-
-
Save advicebanana/80f862135639b6672821 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from bs4 import BeautifulSoup | |
import re | |
import os.path | |
from itertools import count | |
from time import strftime,strptime | |
# original: https://gist.github.com/dmn001/6390139c037949f7ea4b | |
#months = ['Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | |
# set the date format used on the market by your locale. | |
# %d - day of the month | %b - first three letters of name of the month | |
# examples: "%d %b" for "1 Dec". "%b %d" for "Dec 1" | |
dateformatstring = "%d %b" | |
def preamble( ): | |
global of | |
of.write( ("txid;name;date;price;action;gamename;accountid\n") ) | |
def extract ( accountid ): | |
global of | |
action = None | |
year = strftime("%Y") | |
oldday = strptime("31 Dec " + year,"%d %b %Y") # init | |
for file_num in count(1) : | |
filename = "%s_%03d.json" % (accountid,file_num) | |
if not os.path.isfile(filename) : | |
break | |
print filename | |
with open(filename) as data_file: | |
data = json.load(data_file) | |
html = data['results_html'] | |
doc = BeautifulSoup(html, "lxml") | |
rows = doc.find_all('div',class_="market_listing_row market_recent_listing_row") | |
for item in rows: | |
if item.find(text=re.compile("Buyer:")) : | |
action = "s" | |
elif item.find(text=re.compile("Seller:")) : | |
action = "b" | |
if not ( action is None ) : | |
txid = item['id'].split('_')[2] | |
name = item.find('span',class_="market_listing_item_name").text.strip() | |
name = name.replace(',','_') | |
date = item.find('div',class_="market_listing_right_cell market_listing_listed_date").text.strip() | |
newday = strptime(date + " " + year,dateformatstring + " %Y") | |
if oldday.tm_yday < newday.tm_yday : | |
# print str(oldday.tm_yday) + " > " + str(newday.tm_yday) | |
year = str(int(year)-1) # assumes that at least 1 market TX per year happened | |
newday = strptime(date + " " + year,dateformatstring + " %Y") | |
oldday = newday | |
sold_price = item.find('span',class_="market_listing_price").text.strip().replace(u"\u20AC","") | |
sold_price = sold_price.replace(',','.') | |
gamename = item.find('span',class_="market_listing_game_name").text.strip() | |
of.write( ("%s;%s;%s;%s;%s;%s;%s\n" % (txid,name,strftime("%Y-%m-%d",newday),sold_price,action,gamename,accountid)).encode('utf-8') ) | |
action = None | |
of = open("all_output.csv", "wb") | |
preamble() | |
for accid in [0,1,2]: | |
extract( accid ) | |
of.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment