Skip to content

Instantly share code, notes, and snippets.

@jwheare
Created April 24, 2010 19:56
Show Gist options
  • Save jwheare/377902 to your computer and use it in GitHub Desktop.
Save jwheare/377902 to your computer and use it in GitHub Desktop.
Fetch all a user's Last.fm scrobbles by paging through their recent tracks
#!/usr/bin/env python
"""
Fetch all a user's Last.fm scrobbles by paging through their recent tracks
Usage: ./fetch.py <username> [<start_page> [<end_page>]]
Be aware: You may end up with duplicated data if the user is scrobbling
when you fetch for tracks. Make sure you check for dupes when you process
the XML later
"""
import urllib, sys, os, errno
# Change these if you like
OUTDIR = '../data/_ignore/scrobbles/'
API_KEY = 'b25b959554ed76058ac220b7b2e0a026' # Good ole test key
def get_page(user, page = 1):
url = 'http://ws.audioscrobbler.com/2.0/?%s' % urllib.urlencode(dict(
method = 'user.getrecenttracks',
user = user,
api_key = API_KEY,
limit = 200,
page = page
))
try:
tracks = urllib.urlopen(url)
except IOError:
# Try once more in case of flaky connections
tracks = urllib.urlopen(url)
return tracks.read()
args = sys.argv[1:]
user = args[0]
# Get start and end from arguments
if len(args) > 1:
start = int(args[1])
else:
start = 1
if len(args) > 2:
end = int(args[2])
else:
# Fetch a page and use the <recenttracks totalPages> attribute
import xml.etree.ElementTree as ET
tree = ET.fromstring(get_page(user))
end = int(tree.find('recenttracks').get('totalPages'))
print 'fetching pages %s till %s for %s' % (start, end, user)
# Create the output directory if needed
outdir = '%s%s' % (OUTDIR, user)
try:
os.makedirs(outdir)
except OSError, exc: # Python >2.5
if exc.errno == errno.EEXIST:
pass
else: raise
# Loop through the pages, fetch XML and write to disk
for page in range(start, end + 1):
print '%s: %s of %s' % (user, page, end)
out = open('%s/%s.xml' % (outdir, page), 'w')
out.write(get_page(user, page))
out.close()
print 'done fetching for %s' % user
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment