Created
March 19, 2012 02:41
-
-
Save mindsocket/2091580 to your computer and use it in GitHub Desktop.
Use beets' acoustid plugin, musicbrainz lookup and lastfm api to determine playcounts for a collection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/home/roger/.virtualenvs/lastfm/bin/python -u | |
""" | |
File: lastmatchwithnums.py | |
Author: Roger Barnes | |
A simple program for using acoustid to fingerprint and look up metadata (esp. play counts) | |
for MP3 files via lastfm. Usage: | |
$ python lastmatchwithnums.py [folder] ... | |
All mp3s in all folders (recursive) will be fingerprinted with the beets | |
acoustid plugin, then looked up (if possible) from last.fm for play and | |
listener counts. Data is written into redis. | |
Finally, the data can be extracted and reported. Feed into a symlink thusly: | |
$ ./lastmatchwithnums.py | xargs -d '\n' -Ixxx ln -sf xxx ~/audiolinks/ | |
Requirements, all pip installable (+ some dependent system packages, YMMV): | |
beets | |
redis | |
pyacoustid | |
Technology note: The generator pipeline driven programming style in Mathcher was inspired by | |
David Beazley's presentations on generators. - http://www.dabeaz.com/generators/ | |
TODO - multiprocessing? | |
""" | |
import sys | |
import os | |
import pylast | |
from beets.autotag import mb | |
from redis import Redis | |
from beetsplug import chroma | |
from pprint import pprint | |
# This API key is specifically for this script. | |
# http://last.fm/api/account | |
API_KEY = 'faf408096c145277a0e01e712ae4a5f2' | |
PYLAST_EXCEPTIONS = ( | |
pylast.WSError, | |
pylast.MalformedResponseError, | |
pylast.NetworkError, | |
) | |
import fnmatch | |
def gen_find(filepat,top): | |
for path, dirlist, filelist in os.walk(top): | |
for name in fnmatch.filter(filelist,filepat): | |
yield os.path.join(path,name) | |
class Matcher(object): | |
def __init__(self, **kwargs): | |
self.redis = Redis(host=kwargs['redis_host'], port=int(kwargs['redis_port']), db=int(kwargs['redis_db'])) | |
self.network = pylast.LastFMNetwork(api_key = API_KEY) | |
def processpath(self, path): | |
''' Cue the generator pipeline shenanigans! Each call is a generator function | |
''' | |
mp3files = gen_find("*.mp3", path) | |
fullpaths = self._getfullpath(mp3files) | |
# This short circuits anything already in redis | |
newpaths = (path for path in fullpaths if self._addpath(path)) | |
matches = self._getmatches(newpaths) | |
matchrecs = self._getmatchrec(matches) | |
return sum(self._lastfmlookup(matchrecs)) | |
def _getfullpath(self, files): | |
for relfile in files: | |
# print >> sys.stderr, "getfullpath", relfile | |
yield os.path.abspath(os.path.expanduser(relfile)) | |
def _addpath(self, path): | |
''' This one's not a generator ''' | |
key = "lastfm:" + path | |
self.redis.sadd("lastfmdirs", os.path.dirname(key)) | |
self.redis.sadd("lastfmdir:" + os.path.dirname(key), key) | |
result = self.redis.sadd("lastfmpaths", key) | |
if not result: | |
print >> sys.stderr, path, "exists", self.redis.hgetall(key) | |
return result | |
def _getmatches(self, files): | |
for path in files: | |
# print >> sys.stderr, "getmatches", path | |
matchid = self.redis.get("mbid_for:" + path) | |
if not matchid: | |
try: | |
match = chroma.acoustid_match(path) | |
matchid = match[0] if match else None | |
except (EOFError, AttributeError, IOError), err: | |
print >> sys.stderr, "ERROR matching", path, err | |
matchid = None | |
self.redis.set("mbid_for:" + path, matchid) | |
if not matchid: | |
continue | |
yield (path, matchid) | |
def _getmatchrec(self, pairs): | |
for path, matchid in pairs: | |
# print >> sys.stderr, "getmatchrec", path, matchid | |
if matchid: | |
matchrec = self.redis.hgetall("matchrec:" + matchid) | |
if not matchrec: | |
matchrec = {'id': matchid} | |
mbtrack = mb.track_for_id(matchid) | |
try: | |
matchrec['artist'] = mbtrack.artist | |
matchrec['title'] = mbtrack.title | |
except AttributeError: | |
continue | |
self.redis.hmset("matchrec:" + matchid, matchrec) | |
else: | |
matchrec = {} | |
yield (path, matchrec) | |
def _lastfmlookup(self, matchrecs): | |
for path, matchrec in matchrecs: | |
# print >> sys.stderr, "lastfmlookup", path, matchrec | |
key = "lastfm:" + path | |
if 'id' in matchrec: | |
try: | |
track = self.network.get_track_by_mbid(matchrec['id']) | |
except PYLAST_EXCEPTIONS: | |
if 'artist' in matchrec and 'title' in matchrec: | |
track = self.network.get_track(matchrec['artist'], matchrec['title']) | |
else: | |
continue | |
self.redis.hset(key, 'id', matchrec['id']) | |
self.redis.hset(key, 'artist', matchrec['artist']) | |
self.redis.hset(key, 'title', matchrec['title']) | |
try: | |
self.redis.hset(key, 'playcount', track.get_playcount()) | |
self.redis.hset(key, 'listener_count', track.get_listener_count()) | |
except PYLAST_EXCEPTIONS: | |
continue | |
else: | |
self.redis.hset(key, 'id', "NOMATCH") | |
print >> sys.stderr, path, "new", self.redis.hgetall(key) | |
yield 1 | |
def _printsorted(self, identifier, allkeys, calc=lambda play,listen:float(play) / float(listen)): | |
self.redis.delete(identifier) | |
for key in allkeys: | |
rec = self.redis.hgetall(key) | |
if 'playcount' in rec and 'listener_count' in rec and int(rec['playcount']) > 1000: | |
self.redis.zadd(identifier, key, calc(rec['playcount'],rec['listener_count'])) | |
scored = self.redis.zrange(identifier, 0, -1, withscores=True) | |
scored.reverse() | |
return scored | |
def printsortedresults(self): | |
allkeys = self.redis.smembers("lastfmpaths") | |
scored = self._printsorted("lastfmscore", allkeys) | |
for x in scored: | |
if x[1] > 5: | |
print x[0].split(':')[1] | |
def printsortedresultsbydir(self): | |
alldirs = self.redis.smembers("lastfmdirs") | |
for path in alldirs: | |
allkeys = self.redis.smembers("lastfmdir:" + path) | |
scored = self._printsorted("lastfmscore:" + path, allkeys, calc=lambda play,listen:float(play)) | |
for x in scored[:2]: | |
print x[0].split(':')[1] | |
if __name__ == '__main__': | |
args = sys.argv[1:] | |
matcher = Matcher(redis_host='localhost', redis_port=6379, redis_db=0) | |
total = 0 | |
for patharg in args: | |
total += matcher.processpath(patharg) | |
#print "Processed", total, "files" | |
matcher.printsortedresultsbydir() | |
#matcher.printsortedresults() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment