Skip to content

Instantly share code, notes, and snippets.

@zmsmith
Created September 16, 2012 16:00
Show Gist options
  • Save zmsmith/3732960 to your computer and use it in GitHub Desktop.
Save zmsmith/3732960 to your computer and use it in GitHub Desktop.
Find the year an album was published
import requests
import pyquery
import re
import sys
import urllib
SEARCH_URL = 'http://ajax.googleapis.com/ajax/services/search/web'
def unquote_all(url):
new_url = urllib.unquote(url)
if new_url == url:
return new_url
else:
return unquote_all(new_url)
def get_year(album):
query = '{0} album wiki'.format(album)
res = requests.get(SEARCH_URL, params={'q': query, 'v': '1.0'})
url = res.json['responseData']['results'][0]['url']
# Hack to deal with the David Bowie Album '"Hereos"'
# special characters in the url were breaking the script
url = unquote_all(url)
print "Looking up on wiki at {}".format(url)
text = pyquery.PyQuery(requests.get(url).text).find('.published').text()
year = re.search('(\d{4})', text).groups()[0]
return year
if __name__ == '__main__':
print get_year(' '.join(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment