Last active
August 29, 2015 14:13
-
-
Save jilljenn/a130e60aa29ccdd7a79f to your computer and use it in GitHub Desktop.
Data retriever of Oscars nominees
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.request import urlopen | |
from urllib.parse import urlencode | |
from bs4 import BeautifulSoup | |
import json | |
import re | |
# Get your API key at http://developer.rottentomatoes.com | |
RT_API_KEY = "" | |
def get_oscars_nominees(): | |
oscars = BeautifulSoup(urlopen('http://oscar.go.com/nominees').read()) | |
best_picture = oscars.find('div', 'nomineeRowContainer') | |
titles = [] | |
for movie in best_picture.select('.nomineesList li .title'): | |
titles.append(movie.text) | |
return titles | |
def get_rt_data(title): | |
movie_json = urlopen( | |
'http://api.rottentomatoes.com/api/public/v1.0/movies.json?' | |
+ urlencode({'apikey': RT_API_KEY, 'q': title})).read() | |
for movie in json.loads(movie_json.decode('utf-8'))['movies']: | |
if movie['year'] >= 2014: | |
return {'score': movie['ratings']['critics_score'], | |
'runtime': movie['runtime']} | |
def get_wiki_release(title): | |
wiki_json = urlopen( | |
'https://fr.wikipedia.org/w/api.php?' | |
+ urlencode({'action': 'opensearch', 'limit': 15, 'namespace': 0, | |
'format': 'json', 'search': title})).read() | |
_, names, _, urls = json.loads(wiki_json.decode('utf-8')) | |
for i, name in enumerate(names): | |
if len(names) == 1 or 'film' in name: | |
wiki = BeautifulSoup(urlopen(urls[i]).read()) | |
for line in wiki.select('#mw-content-text ul ul li'): | |
span = line.find('span') | |
if (span and 'data-sort-value' in span.attrs | |
and span.attrs['data-sort-value'] == 'France'): | |
return line.find('span', 'date-lien').text | |
print(title, names, urls) | |
print('Nominations aux Oscars (% Rotten Tomatoes, durée, sortie française) :') | |
for title in get_oscars_nominees(): | |
safe_title = re.sub(' or \(.*\)', '', title) | |
data = get_rt_data(safe_title) | |
release_date = get_wiki_release(safe_title) | |
assert data is not None | |
assert release_date is not None | |
print('- %s (%d %%, %d h %2d, %s)' | |
% (safe_title, data['score'], | |
data['runtime'] // 60, data['runtime'] % 60, release_date)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment