Improve matching and metadata extraction

This commit is contained in:
Jonathan Cremin 2015-01-27 21:32:28 +00:00
parent 7c1b8aa771
commit e4a856e228
6 changed files with 74 additions and 36 deletions

View file

@ -97,14 +97,17 @@ module.exports.lookupId = function(id, type) {
}; };
module.exports.search = function(data) { module.exports.search = function(data) {
var cleanParam = function(str) {
return str.replace(/[\:\?\&]+/, "");
}
var query, album; var query, album;
var type = data.type; var type = data.type;
if (type == "album") { if (type == "album") {
query = '"' + data.artist.name + '" "' + data.name + '"'; query = '"' + cleanParam(data.artist.name) + '" "' + cleanParam(data.name) + '"';
album = data.name; album = data.name;
} else if (type == "track") { } else if (type == "track") {
query = '"' + data.artist.name + '" "' + data.name + '"'; query = '"' + cleanParam(data.artist.name) + '" "' + cleanParam(data.name) + '"';
album = data.album.name album = data.album.name
} }
@ -112,21 +115,33 @@ module.exports.search = function(data) {
return request.get(apiRoot + path).promise().then(function(res) { return request.get(apiRoot + path).promise().then(function(res) {
if (!res.body.data[0]) { if (!res.body.data[0]) {
var matches = album.match(/^[^\(\[]+/); return {service: "beats"};
if (matches && matches[0] && matches[0] != album) { } else {
var cleanedData = JSON.parse(JSON.stringify(data)); var found;
if (type == "album") { var choppedAlbum = data.type == "album" ? cleanParam(data.name) : cleanParam(data.album.name);
cleanedData.name = matches[0].trim(); var choppedArtist = cleanParam(data.artist.name);
} else if (type == "track") {
cleanedData.album.name = matches[0].trim(); res.body.data.forEach(function(item) {
var matches = item.detail.match(/^[^\(\[]+/);
if(choppedArtist.indexOf(matches[0]) >= 0) {
found = item;
} }
return module.exports.search(cleanedData); });
} else {
if (!found && !choppedAlbum.length) {
return module.exports.lookupId(res.body.data[0].id, type);
}
res.body.data.forEach(function(item) {
var matches = item.related.display.match(/^[^\(\[]+/);
if(choppedAlbum.indexOf(matches[0]) >= 0) {
found = item;
}
});
if (!found) {
return {service: "beats"}; return {service: "beats"};
} }
} else { return module.exports.lookupId(found.id, type);
//insist on at least album or artist name being exactly right
return module.exports.lookupId(res.body.data[0].id, type);
} }
}); });
}; };

View file

@ -73,14 +73,17 @@ module.exports.lookupId = function(id, type) {
}; };
module.exports.search = function(data) { module.exports.search = function(data) {
var cleanParam = function(str) {
return str.replace(/[\:\?\&]+/, "");
}
var query, album; var query, album;
var type = data.type; var type = data.type;
if (type == "album") { if (type == "album") {
query = data.artist.name + " " + data.name; query = cleanParam(data.artist.name) + " " + cleanParam(data.name);
album = data.name; album = data.name;
} else if (type == "track") { } else if (type == "track") {
query = data.artist.name + " " + data.album.name + " " + data.name; query = cleanParam(data.artist.name) + " " + cleanParam(data.album.name) + " " + cleanParam(data.name);
album = data.album.name; album = data.album.name;
} }

View file

@ -65,32 +65,46 @@ module.exports.lookupId = function(id, type) {
} }
module.exports.search = function(data) { module.exports.search = function(data) {
var cleanParam = function(str) {
var chopChars = ['&', '[', '('];
chopChars.forEach(function(chr) {
if (data.artist.name.indexOf('&') > 0) {
str = str.substring(0, data.artist.name.indexOf(chr));
}
})
return str.replace(/[\:\?]+/, "");
}
var query, album; var query, album;
var type = data.type; var type = data.type;
if (type == "album") { if (type == "album") {
query = "artist:" + data.artist.name.replace(":", "") + " album:" + data.name.replace(":", ""); query = "artist:" + cleanParam(data.artist.name) + " album:" + cleanParam(data.name);
album = data.name; album = data.name;
} else if (type == "track") { } else if (type == "track") {
query = "artist:" + data.artist.name.replace(":", "") + " track:" + data.name.replace(":", "") + ( data.album.name.length > 0 ? " album: " + data.album.name.replace(":", ""): ""); query = "artist:" + cleanParam(data.artist.name) + " track:" + cleanParam(data.name) + ( cleanParam(data.album.name).length > 0 ? " album:" + cleanParam(data.album.name): "");
album = data.album.name; album = data.album.name;
} }
return spotify.searchAsync({query: query, type: type}).then(function(results) { return spotify.searchAsync({query: query, type: type}).then(function(results) {
if (!results[type + "s"].items[0]) { if (!results[type + "s"].items[0]) {
var matches = album.match(/^[^\(\[]+/); return {service: "spotify"};
if (matches && matches[0] && matches[0] != album) { } else {
var cleanedData = JSON.parse(JSON.stringify(data)); var found;
if (type == "album") { var choppedAlbum = data.type == "album" ? cleanParam(data.name) : cleanParam(data.album.name);
cleanedData.name = matches[0].trim(); if (!choppedAlbum.length) {
} else if (type == "track") { return module.exports.lookupId(results[type + "s"].items[0].id, type);
cleanedData.album.name = matches[0].trim(); }
results[type + "s"].items.forEach(function(item) {
var albumName = data.type == "album" ? item.name : item.album.name;
var matches = albumName.match(/^[^\(\[]+/);
if(choppedAlbum.indexOf(matches[0]) >= 0) {
found = item;
} }
return module.exports.search(cleanedData); });
} else { if (!found) {
return {service: "spotify"}; return {service: "spotify"};
} }
} else {
return module.exports.lookupId(results[type + "s"].items[0].id, type); return module.exports.lookupId(results[type + "s"].items[0].id, type);
} }

View file

@ -82,17 +82,19 @@ module.exports.lookupId = function(id, type) {
}; };
module.exports.search = function(data) { module.exports.search = function(data) {
var cleanParam = function(str) {
return str.replace(/[\:\?\&]+/, "");
}
var query, album; var query, album;
var type = data.type; var type = data.type;
if (type == "album") { if (type == "album") {
query = data.artist.name + " " + data.name; query = cleanParam(data.artist.name.substring(0, data.artist.name.indexOf('&'))) + " " + cleanParam(data.name);
album = data.name; album = data.name;
} else if (type == "track") { } else if (type == "track") {
query = data.artist.name + " " + data.name; query = cleanParam(data.artist.name.substring(0, data.artist.name.indexOf('&'))) + " " + cleanParam(data.name);
album = data.album.name album = data.album.name
} }
return getAccessToken().then(function(access_token){ return getAccessToken().then(function(access_token){
var path = "/music/search?q=" + encodeURIComponent(query) + "&filters=" + type + "s"; var path = "/music/search?q=" + encodeURIComponent(query) + "&filters=" + type + "s";
return request.get(apiRoot + path).set("Authorization", "Bearer " + access_token).promise().then(function(res) { return request.get(apiRoot + path).set("Authorization", "Bearer " + access_token).promise().then(function(res) {

View file

@ -2,6 +2,7 @@
var parse = require('url').parse; var parse = require('url').parse;
var freebase = require('./freebase'); var freebase = require('./freebase');
var querystring = require('querystring'); var querystring = require('querystring');
var moment = require('moment');
var Promise = require('bluebird'); var Promise = require('bluebird');
var request = require('superagent'); var request = require('superagent');
require('superagent-bluebird-promise'); require('superagent-bluebird-promise');
@ -37,7 +38,7 @@ module.exports.parseUrl = function(url) {
module.exports.lookupId = function(id, type) { module.exports.lookupId = function(id, type) {
var path = "/videos?part=snippet%2CtopicDetails&id=" + id + "&key=" + credentials.key; var path = "/videos?part=snippet%2CtopicDetails%2CcontentDetails&id=" + id + "&key=" + credentials.key;
return request.get(apiRoot + path).promise().then(function(res) { return request.get(apiRoot + path).promise().then(function(res) {
var item = res.body.items[0]; var item = res.body.items[0];
@ -65,11 +66,13 @@ module.exports.lookupId = function(id, type) {
} else if (topic.property["/type/object/type"].values.some(function(value) { } else if (topic.property["/type/object/type"].values.some(function(value) {
return value.text == "Musical Recording"; return value.text == "Musical Recording";
})) { })) {
if (!match.name) { //if (moment.duration(item.contentDetails.duration).asSeconds() < 900) {
match.album = {name: topic.property["/music/recording/releases"].values[0].text};
match.name = topic.property["/type/object/name"].values[0].text; match.name = topic.property["/type/object/name"].values[0].text;
match.type = "track"; if (topic.property["/music/recording/releases"]) {
} match.type = "album";
match.album.name = topic.property["/music/recording/releases"].values[0].text;
}
//}
} else if (topic.property["/type/object/type"].values.some(function(value) { } else if (topic.property["/type/object/type"].values.some(function(value) {
return value.text == "Musical Album"; return value.text == "Musical Album";
})) { })) {

View file

@ -35,6 +35,7 @@
"express": "~4.10.6", "express": "~4.10.6",
"express-session": "^1.9.2", "express-session": "^1.9.2",
"helmet": "^0.5.2", "helmet": "^0.5.2",
"moment": "^2.9.0",
"morgan": "~1.5.0", "morgan": "~1.5.0",
"node-jsx": "^0.12.4", "node-jsx": "^0.12.4",
"node-uuid": "^1.4.2", "node-uuid": "^1.4.2",