Created
September 16, 2012 16:00
-
-
Save zmsmith/3732960 to your computer and use it in GitHub Desktop.
Find the year an album was published
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pyquery | |
import re | |
import sys | |
import urllib | |
SEARCH_URL = 'http://ajax.googleapis.com/ajax/services/search/web' | |
def unquote_all(url): | |
new_url = urllib.unquote(url) | |
if new_url == url: | |
return new_url | |
else: | |
return unquote_all(new_url) | |
def get_year(album): | |
query = '{0} album wiki'.format(album) | |
res = requests.get(SEARCH_URL, params={'q': query, 'v': '1.0'}) | |
url = res.json['responseData']['results'][0]['url'] | |
# Hack to deal with the David Bowie Album '"Hereos"' | |
# special characters in the url were breaking the script | |
url = unquote_all(url) | |
print "Looking up on wiki at {}".format(url) | |
text = pyquery.PyQuery(requests.get(url).text).find('.published').text() | |
year = re.search('(\d{4})', text).groups()[0] | |
return year | |
if __name__ == '__main__': | |
print get_year(' '.join(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment