Last active
December 30, 2015 14:59
-
-
Save netguy204/7845412 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import json | |
import os | |
import sys | |
import urllib | |
import pickle | |
import re | |
import csv | |
from bs4 import BeautifulSoup | |
def cachename(appid): | |
return os.path.join('steamcache', str(appid)) | |
def read_cache(appid): | |
name = cachename(appid) | |
if os.path.exists(name): | |
with open(name) as f: | |
return pickle.load(f) | |
def write_cache(appid, value): | |
with open(cachename(appid), 'w') as f: | |
pickle.dump(value, f) | |
def extractor(data): | |
for item in data['applist']['apps']['app']: | |
yield item['name'] | |
def printer(gen): | |
for name in gen: | |
print name.encode('ascii', errors='ignore') | |
def getdata(appid): | |
pagesrc = read_cache(appid) | |
if not pagesrc: | |
url = 'http://store.steampowered.com/app/%d/' % appid | |
f = urllib.urlopen(url) | |
pagesrc = f.read() | |
write_cache(appid, pagesrc) | |
price = '?' | |
ismac = False | |
iswin = False | |
islin = False | |
soup = BeautifulSoup(pagesrc) | |
priceel = soup.select('div.price') | |
if priceel: | |
pricestr = priceel[0].text.lstrip().rstrip() | |
price = float(pricestr[1:]) | |
macpic = soup.select('span.platform_img.mac') | |
if macpic: ismac = True | |
winpic = soup.select('span.platform_img.win') | |
if winpic: iswin = True | |
linpic = soup.select('span.platform_img.linux') | |
if linpic: islin = True | |
return {'price': price, 'mac': ismac, 'win': iswin, 'lin': islin} | |
idre = re.compile('app/([0-9]+)') | |
iddict = None | |
def id2name(appid): | |
global iddict | |
if not iddict: | |
with open('steam.json') as f: | |
data = json.load(f) | |
iddict = {} | |
for item in data['applist']['apps']['app']: | |
iddict[item['appid']] = item['name'].encode('ascii', errors='ignore') | |
if appid in iddict: | |
return iddict[appid] | |
else: | |
return None | |
def getsearchpage(page = 1): | |
cachekey = 'page%d' % page | |
pagesrc = read_cache(cachekey) | |
if not pagesrc: | |
baseurl = 'http://store.steampowered.com/search/results' | |
query = urllib.urlencode([('sort_by', 'Name'), ('sort_order', 'ASC'), ('category1', '99'), | |
('cc', 'us'), ('v5', '1'), ('page', str(page))]) | |
url = '%s?%s' % (baseurl, query) | |
pagesrc = urllib.urlopen(url).read() | |
write_cache(cachekey, pagesrc) | |
soup = BeautifulSoup(pagesrc) | |
for row in soup.select('a.search_result_row'): | |
link = row['href'] | |
matches = idre.search(link) | |
appid = None | |
if matches: | |
appid = int(matches.group(1)) | |
else: | |
continue | |
pricestr = row.select('div.search_price')[0].string | |
if pricestr: pricestr = pricestr.rstrip().lstrip() | |
price = '' | |
if pricestr: | |
if pricestr.startswith('Free'): | |
price = 0 | |
else: | |
try: | |
price = float(pricestr[1:]) | |
except: | |
price = pricestr | |
iswin = bool(row.select('span.platform_img.win')) | |
islin = bool(row.select('span.platform_img.linux')) | |
ismac = bool(row.select('span.platform_img.mac')) | |
name = row.select('h4')[0].string | |
if not name: continue | |
name = name.encode('ascii', errors='ignore') | |
metascore_elm = row.select('div.search_metascore') | |
metascore = None | |
if metascore_elm: | |
metascore_str = metascore_elm[0].text.rstrip().lstrip() | |
if metascore_str: metascore = int(metascore_str) | |
yield {'price': price, 'win': iswin, 'lin': islin, 'mac': ismac, | |
'metascore': metascore, 'appid': appid, 'name': name} | |
if __name__ == '__main__': | |
fname = None | |
if len(sys.argv) >= 2: | |
fname = sys.argv[1] | |
if not fname: | |
#print getdata(int(fname)) | |
page = 1 | |
writer = csv.writer(sys.stdout) | |
writer.writerow(['Name', 'Mac', 'Win', 'Lin', 'AppId', 'Score', 'Price']) | |
timeszero = 0 | |
while True: | |
count = 0 | |
for item in getsearchpage(page): | |
writer.writerow([item['name'], item['mac'], item['win'], item['lin'], | |
item['appid'], item['metascore'], item['price']]) | |
count = count + 1 | |
# sometimes pages have only things we don't care about. if | |
# that happens too much then we must have reached the end | |
if count == 0: | |
timeszero = timeszero + 1 | |
else: | |
timeszero = 0 | |
if timeszero == 3: break | |
page = page + 1 | |
elif os.path.exists(fname): | |
printer(extractor(json.loads(open(fname).read()))) | |
else: | |
print 'dont know what to do' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment