Skip to content

Instantly share code, notes, and snippets.

@sleepygarden
Last active September 13, 2016 20:58
Show Gist options
  • Save sleepygarden/0e81aff4bea3b93510779aa808d8c995 to your computer and use it in GitHub Desktop.
Save sleepygarden/0e81aff4bea3b93510779aa808d8c995 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Scrapes breeding combinations + Monster Stats from Siralim Wiki and writes them to a csv file
import csv
import requests
import re
from bs4 import BeautifulSoup
with open('Breeding_Combinations.csv', 'w', newline='') as csvfile:
print('Starting Breeding Combinations write...')
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
html = requests.get('http://siralim.gamewiki.tips/doku.php?id=breeding').text
print('Fetched html, parsing...')
soup = BeautifulSoup(html, 'lxml')
# Creature families
families = soup.find_all('h3', class_=re.compile('sectionedit'))
# Tables containing breeding combinations
tables = soup.find_all('table', class_='inline')
writer.writerow(["Family", "Offspring", "Parent", "Mate"])
for family, table in zip(families, tables):
offspring = None
for r in table.find_all('tr')[1:]:
cols = [family.text] + [c.text for c in r.find_all(['th', 'td'])]
if not cols[1].strip():
# fill in empty offspring column
cols[1] = offspring
else:
offspring = cols[1]
writer.writerow(cols)
print('Finished.')
with open('Monster_Stats.csv', 'w', newline='') as csvfile:
print('Starting Monster Stats write...')
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
# the bestiary table is filled dynamically, either we're missing cookies or something, or it's filled in with JS after page load
# just go there, save page as bestiary.html, and place it next to this file
soup = BeautifulSoup(open("./bestiary.html"), 'lxml')
# Table containing monster stats
tables = soup.find_all('table')
writer.writerow(["Family", "Name", "Class", "Health", "Mana", "Attack", "Intellegence", "Defense", "Speed"])
for table in tables:
for r in table.find_all('tr'):
cols = [c.text for c in r.find_all('td')]
writer.writerow(cols)
print('Finished.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment