Improve matching and metadata extraction

This commit is contained in:
Jonathan Cremin 2015-01-27 21:32:28 +00:00
parent 7c1b8aa771
commit e4a856e228
6 changed files with 74 additions and 36 deletions

View file

@ -97,14 +97,17 @@ module.exports.lookupId = function(id, type) {
};
module.exports.search = function(data) {
var cleanParam = function(str) {
return str.replace(/[\:\?\&]+/, "");
}
var query, album;
var type = data.type;
if (type == "album") {
query = '"' + data.artist.name + '" "' + data.name + '"';
query = '"' + cleanParam(data.artist.name) + '" "' + cleanParam(data.name) + '"';
album = data.name;
} else if (type == "track") {
query = '"' + data.artist.name + '" "' + data.name + '"';
query = '"' + cleanParam(data.artist.name) + '" "' + cleanParam(data.name) + '"';
album = data.album.name
}
@ -112,21 +115,33 @@ module.exports.search = function(data) {
return request.get(apiRoot + path).promise().then(function(res) {
if (!res.body.data[0]) {
var matches = album.match(/^[^\(\[]+/);
if (matches && matches[0] && matches[0] != album) {
var cleanedData = JSON.parse(JSON.stringify(data));
if (type == "album") {
cleanedData.name = matches[0].trim();
} else if (type == "track") {
cleanedData.album.name = matches[0].trim();
return {service: "beats"};
} else {
var found;
var choppedAlbum = data.type == "album" ? cleanParam(data.name) : cleanParam(data.album.name);
var choppedArtist = cleanParam(data.artist.name);
res.body.data.forEach(function(item) {
var matches = item.detail.match(/^[^\(\[]+/);
if(choppedArtist.indexOf(matches[0]) >= 0) {
found = item;
}
return module.exports.search(cleanedData);
} else {
});
if (!found && !choppedAlbum.length) {
return module.exports.lookupId(res.body.data[0].id, type);
}
res.body.data.forEach(function(item) {
var matches = item.related.display.match(/^[^\(\[]+/);
if(choppedAlbum.indexOf(matches[0]) >= 0) {
found = item;
}
});
if (!found) {
return {service: "beats"};
}
} else {
//insist on at least album or artist name being exactly right
return module.exports.lookupId(res.body.data[0].id, type);
return module.exports.lookupId(found.id, type);
}
});
};

View file

@ -73,14 +73,17 @@ module.exports.lookupId = function(id, type) {
};
module.exports.search = function(data) {
var cleanParam = function(str) {
return str.replace(/[\:\?\&]+/, "");
}
var query, album;
var type = data.type;
if (type == "album") {
query = data.artist.name + " " + data.name;
query = cleanParam(data.artist.name) + " " + cleanParam(data.name);
album = data.name;
} else if (type == "track") {
query = data.artist.name + " " + data.album.name + " " + data.name;
query = cleanParam(data.artist.name) + " " + cleanParam(data.album.name) + " " + cleanParam(data.name);
album = data.album.name;
}

View file

@ -65,32 +65,46 @@ module.exports.lookupId = function(id, type) {
}
module.exports.search = function(data) {
var cleanParam = function(str) {
var chopChars = ['&', '[', '('];
chopChars.forEach(function(chr) {
if (data.artist.name.indexOf('&') > 0) {
str = str.substring(0, data.artist.name.indexOf(chr));
}
})
return str.replace(/[\:\?]+/, "");
}
var query, album;
var type = data.type;
if (type == "album") {
query = "artist:" + data.artist.name.replace(":", "") + " album:" + data.name.replace(":", "");
query = "artist:" + cleanParam(data.artist.name) + " album:" + cleanParam(data.name);
album = data.name;
} else if (type == "track") {
query = "artist:" + data.artist.name.replace(":", "") + " track:" + data.name.replace(":", "") + ( data.album.name.length > 0 ? " album: " + data.album.name.replace(":", ""): "");
query = "artist:" + cleanParam(data.artist.name) + " track:" + cleanParam(data.name) + ( cleanParam(data.album.name).length > 0 ? " album:" + cleanParam(data.album.name): "");
album = data.album.name;
}
return spotify.searchAsync({query: query, type: type}).then(function(results) {
if (!results[type + "s"].items[0]) {
var matches = album.match(/^[^\(\[]+/);
if (matches && matches[0] && matches[0] != album) {
var cleanedData = JSON.parse(JSON.stringify(data));
if (type == "album") {
cleanedData.name = matches[0].trim();
} else if (type == "track") {
cleanedData.album.name = matches[0].trim();
return {service: "spotify"};
} else {
var found;
var choppedAlbum = data.type == "album" ? cleanParam(data.name) : cleanParam(data.album.name);
if (!choppedAlbum.length) {
return module.exports.lookupId(results[type + "s"].items[0].id, type);
}
results[type + "s"].items.forEach(function(item) {
var albumName = data.type == "album" ? item.name : item.album.name;
var matches = albumName.match(/^[^\(\[]+/);
if(choppedAlbum.indexOf(matches[0]) >= 0) {
found = item;
}
return module.exports.search(cleanedData);
} else {
});
if (!found) {
return {service: "spotify"};
}
} else {
return module.exports.lookupId(results[type + "s"].items[0].id, type);
}

View file

@ -82,17 +82,19 @@ module.exports.lookupId = function(id, type) {
};
module.exports.search = function(data) {
var cleanParam = function(str) {
return str.replace(/[\:\?\&]+/, "");
}
var query, album;
var type = data.type;
if (type == "album") {
query = data.artist.name + " " + data.name;
query = cleanParam(data.artist.name.substring(0, data.artist.name.indexOf('&'))) + " " + cleanParam(data.name);
album = data.name;
} else if (type == "track") {
query = data.artist.name + " " + data.name;
query = cleanParam(data.artist.name.substring(0, data.artist.name.indexOf('&'))) + " " + cleanParam(data.name);
album = data.album.name
}
return getAccessToken().then(function(access_token){
var path = "/music/search?q=" + encodeURIComponent(query) + "&filters=" + type + "s";
return request.get(apiRoot + path).set("Authorization", "Bearer " + access_token).promise().then(function(res) {

View file

@ -2,6 +2,7 @@
var parse = require('url').parse;
var freebase = require('./freebase');
var querystring = require('querystring');
var moment = require('moment');
var Promise = require('bluebird');
var request = require('superagent');
require('superagent-bluebird-promise');
@ -37,7 +38,7 @@ module.exports.parseUrl = function(url) {
module.exports.lookupId = function(id, type) {
var path = "/videos?part=snippet%2CtopicDetails&id=" + id + "&key=" + credentials.key;
var path = "/videos?part=snippet%2CtopicDetails%2CcontentDetails&id=" + id + "&key=" + credentials.key;
return request.get(apiRoot + path).promise().then(function(res) {
var item = res.body.items[0];
@ -65,11 +66,13 @@ module.exports.lookupId = function(id, type) {
} else if (topic.property["/type/object/type"].values.some(function(value) {
return value.text == "Musical Recording";
})) {
if (!match.name) {
match.album = {name: topic.property["/music/recording/releases"].values[0].text};
//if (moment.duration(item.contentDetails.duration).asSeconds() < 900) {
match.name = topic.property["/type/object/name"].values[0].text;
match.type = "track";
}
if (topic.property["/music/recording/releases"]) {
match.type = "album";
match.album.name = topic.property["/music/recording/releases"].values[0].text;
}
//}
} else if (topic.property["/type/object/type"].values.some(function(value) {
return value.text == "Musical Album";
})) {

View file

@ -35,6 +35,7 @@
"express": "~4.10.6",
"express-session": "^1.9.2",
"helmet": "^0.5.2",
"moment": "^2.9.0",
"morgan": "~1.5.0",
"node-jsx": "^0.12.4",
"node-uuid": "^1.4.2",