From e4a856e228839426d86d21e36993dafcb9278387 Mon Sep 17 00:00:00 2001 From: Jonathan Cremin Date: Tue, 27 Jan 2015 21:32:28 +0000 Subject: [PATCH] Improve matching and metadata extraction --- lib/services/beats/index.js | 43 +++++++++++++++++++++++------------ lib/services/deezer/index.js | 7 ++++-- lib/services/spotify/index.js | 38 +++++++++++++++++++++---------- lib/services/xbox/index.js | 8 ++++--- lib/services/youtube/index.js | 13 +++++++---- package.json | 1 + 6 files changed, 74 insertions(+), 36 deletions(-) diff --git a/lib/services/beats/index.js b/lib/services/beats/index.js index 46496e9..338339e 100644 --- a/lib/services/beats/index.js +++ b/lib/services/beats/index.js @@ -97,14 +97,17 @@ module.exports.lookupId = function(id, type) { }; module.exports.search = function(data) { + var cleanParam = function(str) { + return str.replace(/[\:\?\&]+/, ""); + } var query, album; var type = data.type; if (type == "album") { - query = '"' + data.artist.name + '" "' + data.name + '"'; + query = '"' + cleanParam(data.artist.name) + '" "' + cleanParam(data.name) + '"'; album = data.name; } else if (type == "track") { - query = '"' + data.artist.name + '" "' + data.name + '"'; + query = '"' + cleanParam(data.artist.name) + '" "' + cleanParam(data.name) + '"'; album = data.album.name } @@ -112,21 +115,33 @@ module.exports.search = function(data) { return request.get(apiRoot + path).promise().then(function(res) { if (!res.body.data[0]) { - var matches = album.match(/^[^\(\[]+/); - if (matches && matches[0] && matches[0] != album) { - var cleanedData = JSON.parse(JSON.stringify(data)); - if (type == "album") { - cleanedData.name = matches[0].trim(); - } else if (type == "track") { - cleanedData.album.name = matches[0].trim(); + return {service: "beats"}; + } else { + var found; + var choppedAlbum = data.type == "album" ? cleanParam(data.name) : cleanParam(data.album.name); + var choppedArtist = cleanParam(data.artist.name); + + res.body.data.forEach(function(item) { + var matches = item.detail.match(/^[^\(\[]+/); + if(choppedArtist.indexOf(matches[0]) >= 0) { + found = item; } - return module.exports.search(cleanedData); - } else { + }); + + if (!found && !choppedAlbum.length) { + return module.exports.lookupId(res.body.data[0].id, type); + } + + res.body.data.forEach(function(item) { + var matches = item.related.display.match(/^[^\(\[]+/); + if(choppedAlbum.indexOf(matches[0]) >= 0) { + found = item; + } + }); + if (!found) { return {service: "beats"}; } - } else { - //insist on at least album or artist name being exactly right - return module.exports.lookupId(res.body.data[0].id, type); + return module.exports.lookupId(found.id, type); } }); }; diff --git a/lib/services/deezer/index.js b/lib/services/deezer/index.js index 50ab04e..30adcaa 100644 --- a/lib/services/deezer/index.js +++ b/lib/services/deezer/index.js @@ -73,14 +73,17 @@ module.exports.lookupId = function(id, type) { }; module.exports.search = function(data) { + var cleanParam = function(str) { + return str.replace(/[\:\?\&]+/, ""); + } var query, album; var type = data.type; if (type == "album") { - query = data.artist.name + " " + data.name; + query = cleanParam(data.artist.name) + " " + cleanParam(data.name); album = data.name; } else if (type == "track") { - query = data.artist.name + " " + data.album.name + " " + data.name; + query = cleanParam(data.artist.name) + " " + cleanParam(data.album.name) + " " + cleanParam(data.name); album = data.album.name; } diff --git a/lib/services/spotify/index.js b/lib/services/spotify/index.js index 3f4abd3..3e3b855 100644 --- a/lib/services/spotify/index.js +++ b/lib/services/spotify/index.js @@ -65,32 +65,46 @@ module.exports.lookupId = function(id, type) { } module.exports.search = function(data) { + var cleanParam = function(str) { + var chopChars = ['&', '[', '(']; + chopChars.forEach(function(chr) { + if (data.artist.name.indexOf('&') > 0) { + str = str.substring(0, data.artist.name.indexOf(chr)); + } + }) + return str.replace(/[\:\?]+/, ""); + } var query, album; var type = data.type; if (type == "album") { - query = "artist:" + data.artist.name.replace(":", "") + " album:" + data.name.replace(":", ""); + query = "artist:" + cleanParam(data.artist.name) + " album:" + cleanParam(data.name); album = data.name; } else if (type == "track") { - query = "artist:" + data.artist.name.replace(":", "") + " track:" + data.name.replace(":", "") + ( data.album.name.length > 0 ? " album: " + data.album.name.replace(":", ""): ""); + query = "artist:" + cleanParam(data.artist.name) + " track:" + cleanParam(data.name) + ( cleanParam(data.album.name).length > 0 ? " album:" + cleanParam(data.album.name): ""); album = data.album.name; } return spotify.searchAsync({query: query, type: type}).then(function(results) { if (!results[type + "s"].items[0]) { - var matches = album.match(/^[^\(\[]+/); - if (matches && matches[0] && matches[0] != album) { - var cleanedData = JSON.parse(JSON.stringify(data)); - if (type == "album") { - cleanedData.name = matches[0].trim(); - } else if (type == "track") { - cleanedData.album.name = matches[0].trim(); + return {service: "spotify"}; + } else { + var found; + var choppedAlbum = data.type == "album" ? cleanParam(data.name) : cleanParam(data.album.name); + if (!choppedAlbum.length) { + return module.exports.lookupId(results[type + "s"].items[0].id, type); + } + + results[type + "s"].items.forEach(function(item) { + var albumName = data.type == "album" ? item.name : item.album.name; + var matches = albumName.match(/^[^\(\[]+/); + if(choppedAlbum.indexOf(matches[0]) >= 0) { + found = item; } - return module.exports.search(cleanedData); - } else { + }); + if (!found) { return {service: "spotify"}; } - } else { return module.exports.lookupId(results[type + "s"].items[0].id, type); } diff --git a/lib/services/xbox/index.js b/lib/services/xbox/index.js index d0c276b..1b43174 100644 --- a/lib/services/xbox/index.js +++ b/lib/services/xbox/index.js @@ -82,17 +82,19 @@ module.exports.lookupId = function(id, type) { }; module.exports.search = function(data) { + var cleanParam = function(str) { + return str.replace(/[\:\?\&]+/, ""); + } var query, album; var type = data.type; if (type == "album") { - query = data.artist.name + " " + data.name; + query = cleanParam(data.artist.name.substring(0, data.artist.name.indexOf('&'))) + " " + cleanParam(data.name); album = data.name; } else if (type == "track") { - query = data.artist.name + " " + data.name; + query = cleanParam(data.artist.name.substring(0, data.artist.name.indexOf('&'))) + " " + cleanParam(data.name); album = data.album.name } - return getAccessToken().then(function(access_token){ var path = "/music/search?q=" + encodeURIComponent(query) + "&filters=" + type + "s"; return request.get(apiRoot + path).set("Authorization", "Bearer " + access_token).promise().then(function(res) { diff --git a/lib/services/youtube/index.js b/lib/services/youtube/index.js index f2c1b81..f273697 100644 --- a/lib/services/youtube/index.js +++ b/lib/services/youtube/index.js @@ -2,6 +2,7 @@ var parse = require('url').parse; var freebase = require('./freebase'); var querystring = require('querystring'); +var moment = require('moment'); var Promise = require('bluebird'); var request = require('superagent'); require('superagent-bluebird-promise'); @@ -37,7 +38,7 @@ module.exports.parseUrl = function(url) { module.exports.lookupId = function(id, type) { - var path = "/videos?part=snippet%2CtopicDetails&id=" + id + "&key=" + credentials.key; + var path = "/videos?part=snippet%2CtopicDetails%2CcontentDetails&id=" + id + "&key=" + credentials.key; return request.get(apiRoot + path).promise().then(function(res) { var item = res.body.items[0]; @@ -65,11 +66,13 @@ module.exports.lookupId = function(id, type) { } else if (topic.property["/type/object/type"].values.some(function(value) { return value.text == "Musical Recording"; })) { - if (!match.name) { - match.album = {name: topic.property["/music/recording/releases"].values[0].text}; + //if (moment.duration(item.contentDetails.duration).asSeconds() < 900) { match.name = topic.property["/type/object/name"].values[0].text; - match.type = "track"; - } + if (topic.property["/music/recording/releases"]) { + match.type = "album"; + match.album.name = topic.property["/music/recording/releases"].values[0].text; + } + //} } else if (topic.property["/type/object/type"].values.some(function(value) { return value.text == "Musical Album"; })) { diff --git a/package.json b/package.json index ee0c740..aea8340 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "express": "~4.10.6", "express-session": "^1.9.2", "helmet": "^0.5.2", + "moment": "^2.9.0", "morgan": "~1.5.0", "node-jsx": "^0.12.4", "node-uuid": "^1.4.2",