zduymz · May 18, 2022 01:31
diff --git a/eldenring-dataset.py b/eldenring-dataset.py
 import requests
 from bs4 import BeautifulSoup

 paths = ["armors", "ashes-of-war", "bosses", "enemies", "items", "locations", "magic-spells", "npcs", "shields", "skills", "spirit-ashes", "weapons"]

 def download(path):
    
    output = f'/tmp/{path}.csv'
    
    data = []
    headers = []
    
    
    text = requests.get(f'https://www.gamerguides.com/elden-ring/database/{path}').text
    soup = BeautifulSoup(text, 'html.parser')

    # header
    for th in soup.find_all('table')[1].find('thead').find('tr').find_all('th'):
        headers.append(th.text)

    # data
    for tr in soup.find_all('table')[1].find('tbody').find_all('tr'):
        line = {}
        for td in tr.find_all('td'):
            index = td.get('data-title')
            if index.lower() == 'icon':
                value = td.find('img').get('src')
            else:
                value = td.text
            line[index] = value
        data.append(line)

    header_tpl = ','.join([f'{{{x}}}' for x in headers]) + '\n'
    with open(output, 'w') as f:
        f.write(','.join(headers) + '\n')
        for line in data:
            f.write(header_tpl.format(**line))
    print("Done")

 for path in paths:
    print(f'Downloading {path}')
    download(path)
	import requests
	from bs4 import BeautifulSoup

	paths = ["armors", "ashes-of-war", "bosses", "enemies", "items", "locations", "magic-spells", "npcs", "shields", "skills", "spirit-ashes", "weapons"]

	def download(path):

	output = f'/tmp/{path}.csv'

	data = []
	headers = []


	text = requests.get(f'https://www.gamerguides.com/elden-ring/database/{path}').text
	soup = BeautifulSoup(text, 'html.parser')

	# header
	for th in soup.find_all('table')[1].find('thead').find('tr').find_all('th'):
	headers.append(th.text)

	# data
	for tr in soup.find_all('table')[1].find('tbody').find_all('tr'):
	line = {}
	for td in tr.find_all('td'):
	index = td.get('data-title')
	if index.lower() == 'icon':
	value = td.find('img').get('src')
	else:
	value = td.text
	line[index] = value
	data.append(line)

	header_tpl = ','.join([f'{{{x}}}' for x in headers]) + '\n'
	with open(output, 'w') as f:
	f.write(','.join(headers) + '\n')
	for line in data:
	f.write(header_tpl.format(**line))
	print("Done")

	for path in paths:
	print(f'Downloading {path}')
	download(path)