Skip to content

Instantly share code, notes, and snippets.

@jacobh
Created June 3, 2012 15:55
Show Gist options
  • Save jacobh/2863973 to your computer and use it in GitHub Desktop.
Save jacobh/2863973 to your computer and use it in GitHub Desktop.
LoL Champion info scraper
from bs4 import BeautifulSoup
import requests
import re
import json
base_url = 'http://na.leagueoflegends.com/'
get_page = lambda x: BeautifulSoup(requests.get(base_url + x).text)
# step one, get a list of all champion urls
print 'getting list of urls'
soup = get_page('/champions/')
champion_urls = map(
lambda x: x['href'],
soup.select('#champions_grid_view a.lol_champion')
)
print 'urls acquired'
# now, grab the info for each champ and make a list of dicts
champions = []
i = 0
for champ_url in champion_urls:
soup = get_page(champ_url)
champion = {
'id': int(re.search(r'(\d+)', champ_url).group(0)),
'name': soup.select(".champion_name")[0].text,
'title': soup.select(".champion_title")[0].text,
'description': soup.select(".champion_description")[0].text,
'stats': {},
}
for stat in soup.select('.stats_table tr'):
name = stat.select('.stats_name')[0].text.lower()
champion['stats'][name] = {}
champion['stats'][name]['base'] = float(stat.select('.stats_value')[0].text)
try:
champion['stats'][name]['per_level'] = float(re.search(
r'([+-]\d+)',
stat.select('.ability_per_level_stat')[0].text
).group(0))
except:
champion['stats'][name]['per_level'] = 0
champions.append(champion)
i += 1
print "champion %d/%d -- %s is scraped" % (
i,
len(champion_urls),
champion['name']
)
print json.dumps(champions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment