Created
November 30, 2018 12:06
-
-
Save kykosic/cbf9559424da9470a3407fa89f2d4169 to your computer and use it in GitHub Desktop.
Scrape current Artifact card prices from Steam Market
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Scrape artifact card prices | |
""" | |
from __future__ import division | |
import os | |
import time | |
import logging | |
from io import StringIO | |
from datetime import datetime | |
import boto3 | |
import requests | |
import pandas as pd | |
S3_BUCKET = 'artifact-pricing' | |
DIR_NAME = 'daily_prices' | |
URL = 'https://steamcommunity.com/market/search/render/?appid=583950&norender=1&count=100&start=%d' | |
MAX_RETRIES = 3 | |
def set_logging(): | |
""" Set logging format """ | |
fmt = "[%(levelname)s %(asctime)s] %(message)s" | |
logging.basicConfig(format=fmt, level=logging.INFO) | |
def write_output(df): | |
""" Write a table to output folder in S3 bucket, indexed by date """ | |
# Create target filepath | |
filename = datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv' | |
target_file = os.path.join(DIR_NAME, filename) | |
logging.info("Loading to S3 bucket %s/%s", S3_BUCKET, filename) | |
# Write file to memory buffer | |
mem_buffer = StringIO() | |
df.to_csv(mem_buffer, index=False) | |
s3 = boto3.resource('s3') | |
s3.Object(S3_BUCKET, target_file).put(Body=mem_buffer.getvalue()) | |
def get_prices(): | |
""" Iteratively generate card prices from URL """ | |
idx = 0 | |
while True: | |
data = ( | |
requests | |
.get(url=URL % idx) | |
.json() | |
) | |
idx += 100 | |
if not data['results']: | |
return | |
df = pd.DataFrame({ | |
'card': [res['name'] for res in data['results']], | |
'price': [res['sell_price'] / 100. for res in data['results']] | |
}) | |
yield df | |
def run(): | |
""" Main execution """ | |
logging.info("Fetching...") | |
price_df = ( | |
pd.concat([df for df in get_prices()]) | |
.reset_index(drop=True) | |
) | |
logging.info("%d cards found", price_df.shape[0]) | |
write_output(price_df) | |
logging.info("Done.") | |
def main(try_num=0): | |
""" Wrap main for retries """ | |
try: | |
run() | |
except Exception as e: | |
if try_num < MAX_RETRIES: | |
logging.error("Caught exception: %s, retrying...", e) | |
time.sleep(5) | |
main(try_num + 1) | |
else: | |
logging.error("Maximum retries exceeded") | |
raise | |
if __name__ == "__main__": | |
set_logging() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment