Created
September 4, 2015 03:39
-
-
Save DomNomNom/e37cb08b16bfd415800f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup as bs | |
import urllib.request as request | |
import urllib.parse as parse | |
import pprint | |
# Selects sections of html that we are interested in | |
def prepare(html): | |
soup = bs(html, 'html.parser') | |
game = { | |
'name' : soup.find('div', {'class': 'apphub_AppName'}), | |
'price' : soup.find('div', {'class': 'game_purchase_price price'}), | |
'currency' : soup.find('div', {'class': 'game_purchase_price price'}), | |
'tags' : soup.find_all('a', {'class': 'app_tag'}), | |
'rating' : { | |
'count' : soup.find('meta', { 'itemprop' : 'ratingValue' }), | |
'total' : soup.find('meta', { 'itemprop' : 'reviewCount' }) | |
} | |
} | |
return game | |
def cook(game): | |
defaults = { | |
'name' : '', | |
'price' : 0.0, | |
'currency' : '', | |
'tags' : [], | |
'rating' : { | |
'total': 0, # if this were to be changed to 0.0, it would be parsed as a float | |
'count': 0, | |
} | |
} | |
def processCurrency(currencyTag): | |
text = currencyTag.text | |
if 'Free to Play' in text: | |
return '' | |
else: | |
return text.strip().split()[0] | |
processors = { | |
'name' : lambda x: x.text.strip(), | |
'price' : lambda x: x.text.strip().split()[0], | |
'currency': processCurrency, | |
'tags' : lambda x: [ tag.text.strip() for tag in x ], | |
'rating' : { | |
'total': lambda x: x['content'], | |
'count': lambda x: x['content'], | |
}, | |
} | |
# Runs the processing functions in progessors on the game data. | |
# It infers the target type | |
def process(game, processors, defaults): | |
assert all( gamekey in processors and gamekey in defaults for gamekey in game ) | |
newGame = {} | |
for key, value in processors.items(): | |
if type(value) == dict: | |
newGame[key] = process(game[key], processors[key], defaults[key]) | |
elif callable(value): | |
try: | |
newValue = processors[key](game[key]) | |
defaultType = type(defaults[key]) | |
if type(newValue) != defaultType: | |
newValue = defaultType(newValue) | |
newGame[key] = newValue | |
except: | |
assert key in defaults, 'default not defined for ' + repr(key) | |
newGame[key] = defaults[key] | |
else: | |
raise Exception('processors dict contains something unexpected: ' + repr(value)) | |
return newGame | |
return process(game, processors, defaults) | |
def getHTML(url): | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0', | |
'Accept-Encoding': "utf-8", | |
} | |
# request body | |
body = {} | |
data = parse.urlencode(body) | |
req = request.Request(url + '?' + data, headers=headers) | |
resp = request.urlopen(req) | |
resp = resp.read().decode('utf-8') | |
return resp | |
for app in [220, 440]: | |
url = 'http://store.steampowered.com/app/' + str(app) | |
pprint.pprint(cook(prepare(getHTML(url)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment