Skip to content

Instantly share code, notes, and snippets.

@kykosic
Created November 30, 2018 12:06
Show Gist options
  • Save kykosic/cbf9559424da9470a3407fa89f2d4169 to your computer and use it in GitHub Desktop.
Save kykosic/cbf9559424da9470a3407fa89f2d4169 to your computer and use it in GitHub Desktop.
Scrape current Artifact card prices from Steam Market
#!/usr/bin/env python
"""
Scrape artifact card prices
"""
from __future__ import division
import os
import time
import logging
from io import StringIO
from datetime import datetime
import boto3
import requests
import pandas as pd
S3_BUCKET = 'artifact-pricing'
DIR_NAME = 'daily_prices'
URL = 'https://steamcommunity.com/market/search/render/?appid=583950&norender=1&count=100&start=%d'
MAX_RETRIES = 3
def set_logging():
""" Set logging format """
fmt = "[%(levelname)s %(asctime)s] %(message)s"
logging.basicConfig(format=fmt, level=logging.INFO)
def write_output(df):
""" Write a table to output folder in S3 bucket, indexed by date """
# Create target filepath
filename = datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '.csv'
target_file = os.path.join(DIR_NAME, filename)
logging.info("Loading to S3 bucket %s/%s", S3_BUCKET, filename)
# Write file to memory buffer
mem_buffer = StringIO()
df.to_csv(mem_buffer, index=False)
s3 = boto3.resource('s3')
s3.Object(S3_BUCKET, target_file).put(Body=mem_buffer.getvalue())
def get_prices():
""" Iteratively generate card prices from URL """
idx = 0
while True:
data = (
requests
.get(url=URL % idx)
.json()
)
idx += 100
if not data['results']:
return
df = pd.DataFrame({
'card': [res['name'] for res in data['results']],
'price': [res['sell_price'] / 100. for res in data['results']]
})
yield df
def run():
""" Main execution """
logging.info("Fetching...")
price_df = (
pd.concat([df for df in get_prices()])
.reset_index(drop=True)
)
logging.info("%d cards found", price_df.shape[0])
write_output(price_df)
logging.info("Done.")
def main(try_num=0):
""" Wrap main for retries """
try:
run()
except Exception as e:
if try_num < MAX_RETRIES:
logging.error("Caught exception: %s, retrying...", e)
time.sleep(5)
main(try_num + 1)
else:
logging.error("Maximum retries exceeded")
raise
if __name__ == "__main__":
set_logging()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment