mindsocket · March 19, 2012 02:41
diff --git a/lastmatchwithnums.py b/lastmatchwithnums.py
 #!/home/roger/.virtualenvs/lastfm/bin/python -u
 """
 File: lastmatchwithnums.py
 Author: Roger Barnes

 A simple program for using acoustid to fingerprint and look up metadata (esp. play counts)
 for MP3 files via lastfm. Usage:

    $ python lastmatchwithnums.py [folder] ...
    
 All mp3s in all folders (recursive) will be fingerprinted with the beets 
 acoustid plugin, then looked up (if possible) from last.fm for play and 
 listener counts.  Data is written into redis.

 Finally, the data can be extracted and reported.  Feed into a symlink thusly:

    $ ./lastmatchwithnums.py | xargs -d '\n' -Ixxx ln -sf xxx ~/audiolinks/
    
 Requirements, all pip installable (+ some dependent system packages, YMMV):
    beets
    redis
    pyacoustid
    
 Technology note: The generator pipeline driven programming style in Mathcher was inspired by
 David Beazley's presentations on generators. - http://www.dabeaz.com/generators/
 TODO - multiprocessing?

 """
 import sys
 import os
 import pylast
 from beets.autotag import mb
 from redis import Redis
 from beetsplug import chroma
 from pprint import pprint

 # This API key is specifically for this script.
 # http://last.fm/api/account
 API_KEY = 'faf408096c145277a0e01e712ae4a5f2'

 PYLAST_EXCEPTIONS = (
    pylast.WSError,
    pylast.MalformedResponseError,
    pylast.NetworkError,
 )

 import fnmatch

 def gen_find(filepat,top):
    for path, dirlist, filelist in os.walk(top):
        for name in fnmatch.filter(filelist,filepat):
            yield os.path.join(path,name)

 class Matcher(object):

    def __init__(self, **kwargs):
        self.redis = Redis(host=kwargs['redis_host'], port=int(kwargs['redis_port']), db=int(kwargs['redis_db']))
        self.network = pylast.LastFMNetwork(api_key = API_KEY)

    def processpath(self, path):
        ''' Cue the generator pipeline shenanigans! Each call is a generator function
        '''
      
        mp3files = gen_find("*.mp3", path)
        fullpaths = self._getfullpath(mp3files)
        # This short circuits anything already in redis
        newpaths = (path for path in fullpaths if self._addpath(path))
        matches = self._getmatches(newpaths)
        matchrecs = self._getmatchrec(matches)
        return sum(self._lastfmlookup(matchrecs))

    def _getfullpath(self, files):
        for relfile in files:
 #            print >> sys.stderr, "getfullpath", relfile
            yield os.path.abspath(os.path.expanduser(relfile))
            
    def _addpath(self, path):
        ''' This one's not a generator '''
        key = "lastfm:" + path
        self.redis.sadd("lastfmdirs", os.path.dirname(key))
        self.redis.sadd("lastfmdir:" + os.path.dirname(key), key)
        result = self.redis.sadd("lastfmpaths", key)
        if not result:
            print >> sys.stderr, path, "exists", self.redis.hgetall(key)
        return result

    def _getmatches(self, files):
        for path in files:
 #            print >> sys.stderr, "getmatches", path
            matchid = self.redis.get("mbid_for:" + path)
            if not matchid:
                try:
                    match = chroma.acoustid_match(path)
                    matchid = match[0] if match else None 
                except (EOFError, AttributeError, IOError), err:
                    print >> sys.stderr, "ERROR matching", path, err
                    matchid = None
                
                self.redis.set("mbid_for:" + path, matchid)
                if not matchid:
                    continue
            yield (path, matchid)

    def _getmatchrec(self, pairs):
        for path, matchid in pairs:
 #            print >> sys.stderr, "getmatchrec", path, matchid
            if matchid:
                matchrec = self.redis.hgetall("matchrec:" + matchid)
                if not matchrec:
                    matchrec = {'id': matchid}
                    mbtrack = mb.track_for_id(matchid)
                    try:
                        matchrec['artist'] = mbtrack.artist
                        matchrec['title'] = mbtrack.title
                    except AttributeError:
                        continue
                    self.redis.hmset("matchrec:" + matchid, matchrec)
            else:
                matchrec = {}
    
            yield (path, matchrec)

    def _lastfmlookup(self, matchrecs):
        for path, matchrec in matchrecs:
 #            print >> sys.stderr, "lastfmlookup", path, matchrec
            key = "lastfm:" + path
                    
            if 'id' in matchrec:
                try:
                    track = self.network.get_track_by_mbid(matchrec['id'])
                except PYLAST_EXCEPTIONS:
                    if 'artist' in matchrec and 'title' in matchrec:
                        track = self.network.get_track(matchrec['artist'], matchrec['title'])
                    else:
                        continue

                self.redis.hset(key, 'id', matchrec['id'])
                self.redis.hset(key, 'artist', matchrec['artist'])
                self.redis.hset(key, 'title', matchrec['title'])
                try:
                    self.redis.hset(key, 'playcount', track.get_playcount())
                    self.redis.hset(key, 'listener_count', track.get_listener_count())
                except PYLAST_EXCEPTIONS:
                    continue
            else:
                self.redis.hset(key, 'id', "NOMATCH")
    
            print >> sys.stderr, path, "new", self.redis.hgetall(key)
            yield 1
    
    def _printsorted(self, identifier, allkeys, calc=lambda play,listen:float(play) / float(listen)):
        self.redis.delete(identifier)
        for key in allkeys:
            rec = self.redis.hgetall(key)
            if 'playcount' in rec and 'listener_count' in rec and int(rec['playcount']) > 1000:
                self.redis.zadd(identifier, key, calc(rec['playcount'],rec['listener_count']))
        
        scored = self.redis.zrange(identifier, 0, -1, withscores=True)
        scored.reverse()
        return scored

    def printsortedresults(self):
        allkeys = self.redis.smembers("lastfmpaths")
        scored = self._printsorted("lastfmscore", allkeys)
        for x in scored:
            if x[1] > 5:
                print x[0].split(':')[1]

    def printsortedresultsbydir(self):
        alldirs = self.redis.smembers("lastfmdirs")
        for path in alldirs:
            allkeys = self.redis.smembers("lastfmdir:" + path)
            scored = self._printsorted("lastfmscore:" + path, allkeys, calc=lambda play,listen:float(play))
            for x in scored[:2]:
                print x[0].split(':')[1]


 if __name__ == '__main__':
    args = sys.argv[1:]
    matcher = Matcher(redis_host='localhost', redis_port=6379, redis_db=0)
    total = 0
    for patharg in args:
        total += matcher.processpath(patharg)

    #print "Processed", total, "files"
    
    matcher.printsortedresultsbydir()
    #matcher.printsortedresults()
	#!/home/roger/.virtualenvs/lastfm/bin/python -u
	"""
	File: lastmatchwithnums.py
	Author: Roger Barnes

	A simple program for using acoustid to fingerprint and look up metadata (esp. play counts)
	for MP3 files via lastfm. Usage:

	$ python lastmatchwithnums.py [folder] ...

	All mp3s in all folders (recursive) will be fingerprinted with the beets
	acoustid plugin, then looked up (if possible) from last.fm for play and
	listener counts. Data is written into redis.

	Finally, the data can be extracted and reported. Feed into a symlink thusly:

	$ ./lastmatchwithnums.py \| xargs -d '\n' -Ixxx ln -sf xxx ~/audiolinks/

	Requirements, all pip installable (+ some dependent system packages, YMMV):
	beets
	redis
	pyacoustid

	Technology note: The generator pipeline driven programming style in Mathcher was inspired by
	David Beazley's presentations on generators. - http://www.dabeaz.com/generators/
	TODO - multiprocessing?

	"""
	import sys
	import os
	import pylast
	from beets.autotag import mb
	from redis import Redis
	from beetsplug import chroma
	from pprint import pprint

	# This API key is specifically for this script.
	# http://last.fm/api/account
	API_KEY = 'faf408096c145277a0e01e712ae4a5f2'

	PYLAST_EXCEPTIONS = (
	pylast.WSError,
	pylast.MalformedResponseError,
	pylast.NetworkError,
	)

	import fnmatch

	def gen_find(filepat,top):
	for path, dirlist, filelist in os.walk(top):
	for name in fnmatch.filter(filelist,filepat):
	yield os.path.join(path,name)

	class Matcher(object):

	def __init__(self, **kwargs):
	self.redis = Redis(host=kwargs['redis_host'], port=int(kwargs['redis_port']), db=int(kwargs['redis_db']))
	self.network = pylast.LastFMNetwork(api_key = API_KEY)

	def processpath(self, path):
	''' Cue the generator pipeline shenanigans! Each call is a generator function
	'''

	mp3files = gen_find("*.mp3", path)
	fullpaths = self._getfullpath(mp3files)
	# This short circuits anything already in redis
	newpaths = (path for path in fullpaths if self._addpath(path))
	matches = self._getmatches(newpaths)
	matchrecs = self._getmatchrec(matches)
	return sum(self._lastfmlookup(matchrecs))

	def _getfullpath(self, files):
	for relfile in files:
	# print >> sys.stderr, "getfullpath", relfile
	yield os.path.abspath(os.path.expanduser(relfile))

	def _addpath(self, path):
	''' This one's not a generator '''
	key = "lastfm:" + path
	self.redis.sadd("lastfmdirs", os.path.dirname(key))
	self.redis.sadd("lastfmdir:" + os.path.dirname(key), key)
	result = self.redis.sadd("lastfmpaths", key)
	if not result:
	print >> sys.stderr, path, "exists", self.redis.hgetall(key)
	return result

	def _getmatches(self, files):
	for path in files:
	# print >> sys.stderr, "getmatches", path
	matchid = self.redis.get("mbid_for:" + path)
	if not matchid:
	try:
	match = chroma.acoustid_match(path)
	matchid = match[0] if match else None
	except (EOFError, AttributeError, IOError), err:
	print >> sys.stderr, "ERROR matching", path, err
	matchid = None

	self.redis.set("mbid_for:" + path, matchid)
	if not matchid:
	continue
	yield (path, matchid)

	def _getmatchrec(self, pairs):
	for path, matchid in pairs:
	# print >> sys.stderr, "getmatchrec", path, matchid
	if matchid:
	matchrec = self.redis.hgetall("matchrec:" + matchid)
	if not matchrec:
	matchrec = {'id': matchid}
	mbtrack = mb.track_for_id(matchid)
	try:
	matchrec['artist'] = mbtrack.artist
	matchrec['title'] = mbtrack.title
	except AttributeError:
	continue
	self.redis.hmset("matchrec:" + matchid, matchrec)
	else:
	matchrec = {}

	yield (path, matchrec)

	def _lastfmlookup(self, matchrecs):
	for path, matchrec in matchrecs:
	# print >> sys.stderr, "lastfmlookup", path, matchrec
	key = "lastfm:" + path

	if 'id' in matchrec:
	try:
	track = self.network.get_track_by_mbid(matchrec['id'])
	except PYLAST_EXCEPTIONS:
	if 'artist' in matchrec and 'title' in matchrec:
	track = self.network.get_track(matchrec['artist'], matchrec['title'])
	else:
	continue

	self.redis.hset(key, 'id', matchrec['id'])
	self.redis.hset(key, 'artist', matchrec['artist'])
	self.redis.hset(key, 'title', matchrec['title'])
	try:
	self.redis.hset(key, 'playcount', track.get_playcount())
	self.redis.hset(key, 'listener_count', track.get_listener_count())
	except PYLAST_EXCEPTIONS:
	continue
	else:
	self.redis.hset(key, 'id', "NOMATCH")

	print >> sys.stderr, path, "new", self.redis.hgetall(key)
	yield 1

	def _printsorted(self, identifier, allkeys, calc=lambda play,listen:float(play) / float(listen)):
	self.redis.delete(identifier)
	for key in allkeys:
	rec = self.redis.hgetall(key)
	if 'playcount' in rec and 'listener_count' in rec and int(rec['playcount']) > 1000:
	self.redis.zadd(identifier, key, calc(rec['playcount'],rec['listener_count']))

	scored = self.redis.zrange(identifier, 0, -1, withscores=True)
	scored.reverse()
	return scored

	def printsortedresults(self):
	allkeys = self.redis.smembers("lastfmpaths")
	scored = self._printsorted("lastfmscore", allkeys)
	for x in scored:
	if x[1] > 5:
	print x[0].split(':')[1]

	def printsortedresultsbydir(self):
	alldirs = self.redis.smembers("lastfmdirs")
	for path in alldirs:
	allkeys = self.redis.smembers("lastfmdir:" + path)
	scored = self._printsorted("lastfmscore:" + path, allkeys, calc=lambda play,listen:float(play))
	for x in scored[:2]:
	print x[0].split(':')[1]


	if __name__ == '__main__':
	args = sys.argv[1:]
	matcher = Matcher(redis_host='localhost', redis_port=6379, redis_db=0)
	total = 0
	for patharg in args:
	total += matcher.processpath(patharg)

	#print "Processed", total, "files"

	matcher.printsortedresultsbydir()
	#matcher.printsortedresults()
No results found