Skip to content

Instantly share code, notes, and snippets.

@zduymz
Created May 18, 2022 01:31
Show Gist options
  • Save zduymz/c45007064b1b56093716d7b0399f91c7 to your computer and use it in GitHub Desktop.
Save zduymz/c45007064b1b56093716d7b0399f91c7 to your computer and use it in GitHub Desktop.
crawling eldenring dataset from gamerguides.com
import requests
from bs4 import BeautifulSoup
paths = ["armors", "ashes-of-war", "bosses", "enemies", "items", "locations", "magic-spells", "npcs", "shields", "skills", "spirit-ashes", "weapons"]
def download(path):
output = f'/tmp/{path}.csv'
data = []
headers = []
text = requests.get(f'https://www.gamerguides.com/elden-ring/database/{path}').text
soup = BeautifulSoup(text, 'html.parser')
# header
for th in soup.find_all('table')[1].find('thead').find('tr').find_all('th'):
headers.append(th.text)
# data
for tr in soup.find_all('table')[1].find('tbody').find_all('tr'):
line = {}
for td in tr.find_all('td'):
index = td.get('data-title')
if index.lower() == 'icon':
value = td.find('img').get('src')
else:
value = td.text
line[index] = value
data.append(line)
header_tpl = ','.join([f'{{{x}}}' for x in headers]) + '\n'
with open(output, 'w') as f:
f.write(','.join(headers) + '\n')
for line in data:
f.write(header_tpl.format(**line))
print("Done")
for path in paths:
print(f'Downloading {path}')
download(path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment