Created
September 11, 2017 23:32
-
-
Save skwerlman/882c2192fa55ab19fddb1814ba8dd7f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import json | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
from zenlog import log | |
NAME_CORRECTION_DICT = { | |
'm50 interceptor': 'm50', | |
'vanduul scythe': 'scythe', | |
'javelin-class destroyer': 'javelin', | |
'reliant kore - mini hauler': 'reliant kore', | |
'reliant mako - news van': 'reliant mako', | |
'reliant sen - researcher': 'reliant sen', | |
'reliant tana - skirmisher': 'reliant tana', | |
'esperia glaive': 'glaive', | |
'esperia vanduul blade': 'blade', | |
'mpuv personnel': 'mpuv-1p', | |
'mpuv cargo': 'mpuv-1c', | |
'esperia prowler': 'prowler', | |
'razor': 'misc razor', | |
'origin 600i touring': '600i touring', | |
'origin 600i explorer': '600i explorer', | |
} | |
SHIP_MATRIX_URL = "https://robertsspaceindustries.com/ship-specs" | |
SHIP_PRICE_LIST_URL = "http://starcitizen.wikia.com/wiki/List_of_ship_and_vehicle_prices" | |
OUT_FILE = "ship-data.tsv" | |
def get_price_data(soup): | |
tables = soup.select('table.article-table') | |
data = {} | |
for table in tables: | |
rows = table.select('tr') | |
for row in rows: | |
cols = row.select('td') | |
if not cols: # this is a header row | |
continue | |
name = cols[0].get_text().strip().lower() | |
price = cols[2].get_text().strip().replace('$', '').replace('--', '0') | |
data[name] = int(price) | |
return data | |
def get_matrix_data(soup, price_data): | |
match = re.search(r'data: (\[\{.*\}\])', str(soup)) | |
json_str = match.group(1) | |
matrix = json.loads(json_str) | |
data = [] | |
for ship in matrix: | |
ship_info = {} | |
focus = ship['focus'] | |
if focus: | |
focuses = focus.split('/') | |
else: | |
focuses = [''] | |
ship_info['classification'] = ship['classification'] | |
ship_info['focus1'] = focuses[0].strip() | |
if len(focuses) > 1: | |
ship_info['focus2'] = focuses[1].strip() | |
else: | |
ship_info['focus2'] = '' | |
ship_info['manufacturer'] = ship['manufacturer']['code'] | |
ship_info['name'] = ship['name'] | |
if ship_info['name'].lower() in NAME_CORRECTION_DICT: | |
ship_info['price'] = price_data[NAME_CORRECTION_DICT[ship_info['name'].lower()]] | |
elif ship_info['name'].lower() in price_data: | |
ship_info['price'] = price_data[ship_info['name'].lower()] | |
else: | |
log.warn(f'No price info for {ship_info["name"]}') | |
ship_info['price'] = 0 | |
data.append(ship_info) | |
return data | |
def main(): | |
r = requests.get(SHIP_PRICE_LIST_URL) | |
if r.status_code != 200: | |
log.critical(f'Ship prices failed to download with code {r.status_code}') | |
exit(1) | |
price_data = get_price_data(BeautifulSoup(r.text, 'html.parser')) | |
r = requests.get(SHIP_MATRIX_URL) | |
if r.status_code != 200: | |
log.critical(f'Ship matrix failed to download with code {r.status_code}') | |
exit(1) | |
data = get_matrix_data(BeautifulSoup(r.text, 'html.parser'), price_data) | |
keys = data[0].keys() | |
with open(OUT_FILE, 'w') as output_file: | |
dict_writer = csv.DictWriter(output_file, keys, delimiter='\t', lineterminator='\n') | |
dict_writer.writeheader() | |
dict_writer.writerows(data) | |
if __name__ == '__main__': | |
# try: | |
main() | |
# except Exception as exc: | |
# log.critical(exc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment