Last active
September 13, 2016 20:58
-
-
Save sleepygarden/0e81aff4bea3b93510779aa808d8c995 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Scrapes breeding combinations + Monster Stats from Siralim Wiki and writes them to a csv file | |
import csv | |
import requests | |
import re | |
from bs4 import BeautifulSoup | |
with open('Breeding_Combinations.csv', 'w', newline='') as csvfile: | |
print('Starting Breeding Combinations write...') | |
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL) | |
html = requests.get('http://siralim.gamewiki.tips/doku.php?id=breeding').text | |
print('Fetched html, parsing...') | |
soup = BeautifulSoup(html, 'lxml') | |
# Creature families | |
families = soup.find_all('h3', class_=re.compile('sectionedit')) | |
# Tables containing breeding combinations | |
tables = soup.find_all('table', class_='inline') | |
writer.writerow(["Family", "Offspring", "Parent", "Mate"]) | |
for family, table in zip(families, tables): | |
offspring = None | |
for r in table.find_all('tr')[1:]: | |
cols = [family.text] + [c.text for c in r.find_all(['th', 'td'])] | |
if not cols[1].strip(): | |
# fill in empty offspring column | |
cols[1] = offspring | |
else: | |
offspring = cols[1] | |
writer.writerow(cols) | |
print('Finished.') | |
with open('Monster_Stats.csv', 'w', newline='') as csvfile: | |
print('Starting Monster Stats write...') | |
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL) | |
# the bestiary table is filled dynamically, either we're missing cookies or something, or it's filled in with JS after page load | |
# just go there, save page as bestiary.html, and place it next to this file | |
soup = BeautifulSoup(open("./bestiary.html"), 'lxml') | |
# Table containing monster stats | |
tables = soup.find_all('table') | |
writer.writerow(["Family", "Name", "Class", "Health", "Mana", "Attack", "Intellegence", "Defense", "Speed"]) | |
for table in tables: | |
for r in table.find_all('tr'): | |
cols = [c.text for c in r.find_all('td')] | |
writer.writerow(cols) | |
print('Finished.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment