Skip to content

Instantly share code, notes, and snippets.

@netguy204
Last active December 30, 2015 14:59
Show Gist options
  • Save netguy204/7845412 to your computer and use it in GitHub Desktop.
Save netguy204/7845412 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import json
import os
import sys
import urllib
import pickle
import re
import csv
from bs4 import BeautifulSoup
def cachename(appid):
return os.path.join('steamcache', str(appid))
def read_cache(appid):
name = cachename(appid)
if os.path.exists(name):
with open(name) as f:
return pickle.load(f)
def write_cache(appid, value):
with open(cachename(appid), 'w') as f:
pickle.dump(value, f)
def extractor(data):
for item in data['applist']['apps']['app']:
yield item['name']
def printer(gen):
for name in gen:
print name.encode('ascii', errors='ignore')
def getdata(appid):
pagesrc = read_cache(appid)
if not pagesrc:
url = 'http://store.steampowered.com/app/%d/' % appid
f = urllib.urlopen(url)
pagesrc = f.read()
write_cache(appid, pagesrc)
price = '?'
ismac = False
iswin = False
islin = False
soup = BeautifulSoup(pagesrc)
priceel = soup.select('div.price')
if priceel:
pricestr = priceel[0].text.lstrip().rstrip()
price = float(pricestr[1:])
macpic = soup.select('span.platform_img.mac')
if macpic: ismac = True
winpic = soup.select('span.platform_img.win')
if winpic: iswin = True
linpic = soup.select('span.platform_img.linux')
if linpic: islin = True
return {'price': price, 'mac': ismac, 'win': iswin, 'lin': islin}
idre = re.compile('app/([0-9]+)')
iddict = None
def id2name(appid):
global iddict
if not iddict:
with open('steam.json') as f:
data = json.load(f)
iddict = {}
for item in data['applist']['apps']['app']:
iddict[item['appid']] = item['name'].encode('ascii', errors='ignore')
if appid in iddict:
return iddict[appid]
else:
return None
def getsearchpage(page = 1):
cachekey = 'page%d' % page
pagesrc = read_cache(cachekey)
if not pagesrc:
baseurl = 'http://store.steampowered.com/search/results'
query = urllib.urlencode([('sort_by', 'Name'), ('sort_order', 'ASC'), ('category1', '99'),
('cc', 'us'), ('v5', '1'), ('page', str(page))])
url = '%s?%s' % (baseurl, query)
pagesrc = urllib.urlopen(url).read()
write_cache(cachekey, pagesrc)
soup = BeautifulSoup(pagesrc)
for row in soup.select('a.search_result_row'):
link = row['href']
matches = idre.search(link)
appid = None
if matches:
appid = int(matches.group(1))
else:
continue
pricestr = row.select('div.search_price')[0].string
if pricestr: pricestr = pricestr.rstrip().lstrip()
price = ''
if pricestr:
if pricestr.startswith('Free'):
price = 0
else:
try:
price = float(pricestr[1:])
except:
price = pricestr
iswin = bool(row.select('span.platform_img.win'))
islin = bool(row.select('span.platform_img.linux'))
ismac = bool(row.select('span.platform_img.mac'))
name = row.select('h4')[0].string
if not name: continue
name = name.encode('ascii', errors='ignore')
metascore_elm = row.select('div.search_metascore')
metascore = None
if metascore_elm:
metascore_str = metascore_elm[0].text.rstrip().lstrip()
if metascore_str: metascore = int(metascore_str)
yield {'price': price, 'win': iswin, 'lin': islin, 'mac': ismac,
'metascore': metascore, 'appid': appid, 'name': name}
if __name__ == '__main__':
fname = None
if len(sys.argv) >= 2:
fname = sys.argv[1]
if not fname:
#print getdata(int(fname))
page = 1
writer = csv.writer(sys.stdout)
writer.writerow(['Name', 'Mac', 'Win', 'Lin', 'AppId', 'Score', 'Price'])
timeszero = 0
while True:
count = 0
for item in getsearchpage(page):
writer.writerow([item['name'], item['mac'], item['win'], item['lin'],
item['appid'], item['metascore'], item['price']])
count = count + 1
# sometimes pages have only things we don't care about. if
# that happens too much then we must have reached the end
if count == 0:
timeszero = timeszero + 1
else:
timeszero = 0
if timeszero == 3: break
page = page + 1
elif os.path.exists(fname):
printer(extractor(json.loads(open(fname).read())))
else:
print 'dont know what to do'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment