Created
June 26, 2020 10:01
-
-
Save lotka/e342a60afbc607bc649051f8021fd7f2 to your computer and use it in GitHub Desktop.
prices
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
from time import sleep | |
import pandas as pd | |
from datetime import date | |
from datetime import timedelta | |
import pickle | |
from google.cloud import storage | |
def main(_): | |
basic_lands = {'forest','swamp','plains','island','mountain'} | |
deck_urls = {'kiora' : 'https://tappedout.net/mtg-decks/simic-thievery-2', | |
'zarek' : 'http://tappedout.net/mtg-decks/ral-zarek-tapz', | |
'gw-tokens' : 'https://www.mtgvault.com/henrydrake/decks/green-white-tokens/', | |
'sah' : 'http://tappedout.net/mtg-decks/saheeli-oathbreaker-20', | |
'ajani' : 'https://tappedout.net/mtg-decks/ajani-oathbreaker-15/', | |
'sorin' : 'https://tappedout.net/mtg-decks/sorin-budget-oathbreaker-rev-1', | |
'domri julius' : 'https://tappedout.net/mtg-decks/08-07-19-domri-oathbreaker', | |
'domri luka' : 'https://tappedout.net/mtg-decks/domri-oathbreaker-15', | |
'sorin ross' : 'https://www.mtgvault.com/rossenheimer/decks/sorin-oathbreaker/', | |
'uwflyers ross' : 'https://www.mtgvault.com/rossenheimer/decks/uw-flying-oathbreaker/', | |
'angrath' : 'https://www.mtgvault.com/rossenheimer/decks/stealing-angrath-mkii/', | |
'kiora behemoth' : 'http://tappedout.net/mtg-decks/ug-oathbreaker-2/', | |
'ashiok mill': 'http://tappedout.net/mtg-decks/ashiok-mill-oathbreaker-15/', | |
'gideon' : 'https://tappedout.net/mtg-decks/getting-giddy-with-the-gid/', | |
'dav' : 'https://tappedout.net/mtg-decks/budget-oathbreaker-davriel-discard-deck', | |
'ajani weenie' : 'https://www.mtgvault.com/zitzewitze/decks/ajani-budget-weeni-oathbreaker/', | |
'ashiok rouge' : 'https://www.mtgvault.com/rossenheimer/decks/ashioks-rogues/', | |
'ashiok luka' : 'http://tappedout.net/mtg-decks/bashiok-cream-lender-15-oathbreaker/', | |
'ashiok henry': 'https://tappedout.net/mtg-decks/ashiok-self-millsurveil-deck/', | |
'dovin' : 'https://tappedout.net/mtg-decks/29-07-19-dovin-oathbreaker/', | |
'huatli' : 'https://tappedout.net/mtg-decks/03-07-19-huatli-oathbreaker/', | |
'ral' : 'https://tappedout.net/mtg-decks/ral-storm-conduit-oathbreaker-4/', | |
'bg dead' : 'https://tappedout.net/mtg-decks/bg-dead-copy', | |
'ob nixilis' : 'https://tappedout.net/mtg-decks/26-06-19-ob-nixilis-oathbreaker/'} | |
deck_data = {} | |
cache = {} | |
def get_deck_list(url): | |
page = requests.get(url) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
return soup | |
def get_price(name,currency='usd'): | |
url = f'https://api.scryfall.com/cards/search?q=!%22{name}%22&unique=prints' | |
if name in cache: | |
return cache[name] | |
else: | |
resp = requests.get(url) | |
if resp.status_code != 200: | |
print('Failed:',url) | |
sleep(0.1) | |
price = pd.Series(list(map(lambda x : x['prices'][currency],resp.json()['data']))).astype(float).dropna().median() | |
if price: | |
cache[name] = float(price) | |
return cache[name] | |
if currency == 'eur': | |
return 0.0 | |
price = pd.Series(list(map(lambda x : x['prices'][currency + '_foil'],resp.json()['data']))).astype(float).dropna().median() | |
if price: | |
cache[name] = float(price) | |
return cache[name] | |
else: | |
cache[name] = 0.0 | |
return 0.0 | |
def format_tappedout_deck_list(deck_list_soup): | |
deck_list = deck_list_soup.find(name='textarea',attrs={'id':'mtga-textarea'}).text | |
formatted_deck_list = [] | |
for line in deck_list.split('\n'): | |
card = line.split('(')[0] | |
if len(card) > 0: | |
split = card.split(' ') | |
number = split[0] | |
name = ' '.join(split[1:]).strip() | |
if name.lower().strip() not in basic_lands: | |
formatted_deck_list.append({'name' : name, 'count' : int(number), 'price' : get_price(name,currency='usd')}) | |
return formatted_deck_list | |
def format_mtgvault_deck_list(soup): | |
try: | |
for a in soup.findAll('a'): | |
if 'http://store.tcgplayer.com/list/selectproductmagic.aspx?' in a['href']: | |
break | |
deck_list = list(map(lambda s : str(s.split('|')[0].replace('+',' ').replace('%27','\'').replace('%2c',',')),a['href'].split(' ')[1:])) | |
formatted_deck_list = [] | |
for card in deck_list: | |
if card.lower().strip() not in basic_lands: | |
formatted_deck_list.append({'name' : card, 'count' : int(1), 'price' : get_price(card,currency='usd')}) | |
return formatted_deck_list | |
except KeyError as ke: | |
print(ke) | |
return [] | |
storage_client = storage.Client(project='nifty-beast-realm') | |
bucket = storage_client.get_bucket('mtg-budget') | |
s = f'prices-v0.1/{date.today()}.pkl' | |
blob = bucket.blob(s) | |
if blob.exists(): | |
print(f'Cache exists, downloading {s}...') | |
cache = pickle.loads(blob.download_as_string()) | |
else: | |
print('No cache found!') | |
cache = {} | |
yesterday_blob = bucket.blob(f'prices-v0.1/{date.today() - timedelta(1)}.pkl') | |
if yesterday_blob.exists(): | |
card_name_cache = pickle.loads(yesterday_blob.download_as_string()).keys() | |
print('Adding yesterdays cards to the cache..') | |
for card in card_name_cache: | |
get_price(card) | |
for deck_name in deck_urls.keys(): | |
print('Downloading', deck_name) | |
deck_data[deck_name] = {'data': get_deck_list(deck_urls[deck_name]), 'url' : deck_urls[deck_name]} | |
deck_lists = {} | |
for deck_name in deck_data: | |
print('Pricing',deck_name) | |
if 'tappedout' in deck_data[deck_name]['url']: | |
deck_lists[deck_name] = format_tappedout_deck_list(deck_data[deck_name]['data']) | |
if 'mtgvault' in deck_data[deck_name]['url']: | |
deck_lists[deck_name] = format_mtgvault_deck_list(deck_data[deck_name]['data']) | |
blob.upload_from_string(pickle.dumps(cache)) | |
summary = [] | |
for deck in deck_lists: | |
print('Formatting',deck) | |
if len(deck_lists[deck]) > 0: | |
summary.append({'name' : deck, | |
'total_usd_price' : pd.np.round(pd.DataFrame(deck_lists[deck])['price'].sum(),2), | |
'cards_with_no_cost' : pd.np.round((pd.DataFrame(deck_lists[deck])['price'] == 0.0).sum(),2), | |
'min_usd_price' : pd.np.round(pd.DataFrame(deck_lists[deck])['price'].min(),2), | |
'max_usd_price' : pd.np.round(pd.DataFrame(deck_lists[deck])['price'].max(),2), | |
'url' : deck_data[deck]['url'], | |
'count' : len(deck_lists[deck])}) | |
df = pd.DataFrame(summary).sort_values(by='total_usd_price').reset_index(drop=True) | |
return df.to_html() + '<br> Prices are scraped from Scryfall. <br> Card prices are updated every day at midnight GMT. <br> If a card not in any other deck is added, the price will be the most recent price from Scryfall.' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment