Skip to content

Instantly share code, notes, and snippets.

@DomNomNom
Created September 4, 2015 03:39
Show Gist options
  • Save DomNomNom/e37cb08b16bfd415800f to your computer and use it in GitHub Desktop.
Save DomNomNom/e37cb08b16bfd415800f to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup as bs
import urllib.request as request
import urllib.parse as parse
import pprint
# Selects sections of html that we are interested in
def prepare(html):
soup = bs(html, 'html.parser')
game = {
'name' : soup.find('div', {'class': 'apphub_AppName'}),
'price' : soup.find('div', {'class': 'game_purchase_price price'}),
'currency' : soup.find('div', {'class': 'game_purchase_price price'}),
'tags' : soup.find_all('a', {'class': 'app_tag'}),
'rating' : {
'count' : soup.find('meta', { 'itemprop' : 'ratingValue' }),
'total' : soup.find('meta', { 'itemprop' : 'reviewCount' })
}
}
return game
def cook(game):
defaults = {
'name' : '',
'price' : 0.0,
'currency' : '',
'tags' : [],
'rating' : {
'total': 0, # if this were to be changed to 0.0, it would be parsed as a float
'count': 0,
}
}
def processCurrency(currencyTag):
text = currencyTag.text
if 'Free to Play' in text:
return ''
else:
return text.strip().split()[0]
processors = {
'name' : lambda x: x.text.strip(),
'price' : lambda x: x.text.strip().split()[0],
'currency': processCurrency,
'tags' : lambda x: [ tag.text.strip() for tag in x ],
'rating' : {
'total': lambda x: x['content'],
'count': lambda x: x['content'],
},
}
# Runs the processing functions in progessors on the game data.
# It infers the target type
def process(game, processors, defaults):
assert all( gamekey in processors and gamekey in defaults for gamekey in game )
newGame = {}
for key, value in processors.items():
if type(value) == dict:
newGame[key] = process(game[key], processors[key], defaults[key])
elif callable(value):
try:
newValue = processors[key](game[key])
defaultType = type(defaults[key])
if type(newValue) != defaultType:
newValue = defaultType(newValue)
newGame[key] = newValue
except:
assert key in defaults, 'default not defined for ' + repr(key)
newGame[key] = defaults[key]
else:
raise Exception('processors dict contains something unexpected: ' + repr(value))
return newGame
return process(game, processors, defaults)
def getHTML(url):
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept-Encoding': "utf-8",
}
# request body
body = {}
data = parse.urlencode(body)
req = request.Request(url + '?' + data, headers=headers)
resp = request.urlopen(req)
resp = resp.read().decode('utf-8')
return resp
for app in [220, 440]:
url = 'http://store.steampowered.com/app/' + str(app)
pprint.pprint(cook(prepare(getHTML(url))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment