jacobh · June 3, 2012 15:55
diff --git a/scraper.py b/scraper.py
 from bs4 import BeautifulSoup
 import requests
 import re
 import json

 base_url = 'http://na.leagueoflegends.com/'
 get_page = lambda x: BeautifulSoup(requests.get(base_url + x).text)


 # step one, get a list of all champion urls
 print 'getting list of urls'
 soup = get_page('/champions/')

 champion_urls = map(
    lambda x: x['href'],
    soup.select('#champions_grid_view a.lol_champion')
 )

 print 'urls acquired'
 # now, grab the info for each champ and make a list of dicts
 champions = []
 i = 0

 for champ_url in champion_urls:
    soup = get_page(champ_url)

    champion = {
        'id': int(re.search(r'(\d+)', champ_url).group(0)),
        'name': soup.select(".champion_name")[0].text,
        'title':  soup.select(".champion_title")[0].text,
        'description': soup.select(".champion_description")[0].text,
        'stats': {},
    }
    for stat in soup.select('.stats_table tr'):
        name = stat.select('.stats_name')[0].text.lower()
        champion['stats'][name] = {}

        champion['stats'][name]['base'] = float(stat.select('.stats_value')[0].text)

        try:
            champion['stats'][name]['per_level'] = float(re.search(
                r'([+-]\d+)',
                stat.select('.ability_per_level_stat')[0].text
            ).group(0))
        except:
            champion['stats'][name]['per_level'] = 0

    champions.append(champion)
    i += 1
    print "champion %d/%d -- %s is scraped" % (
        i,
        len(champion_urls),
        champion['name']
    )
    

 print json.dumps(champions)
	from bs4 import BeautifulSoup
	import requests
	import re
	import json

	base_url = 'http://na.leagueoflegends.com/'
	get_page = lambda x: BeautifulSoup(requests.get(base_url + x).text)


	# step one, get a list of all champion urls
	print 'getting list of urls'
	soup = get_page('/champions/')

	champion_urls = map(
	lambda x: x['href'],
	soup.select('#champions_grid_view a.lol_champion')
	)

	print 'urls acquired'
	# now, grab the info for each champ and make a list of dicts
	champions = []
	i = 0

	for champ_url in champion_urls:
	soup = get_page(champ_url)

	champion = {
	'id': int(re.search(r'(\d+)', champ_url).group(0)),
	'name': soup.select(".champion_name")[0].text,
	'title': soup.select(".champion_title")[0].text,
	'description': soup.select(".champion_description")[0].text,
	'stats': {},
	}
	for stat in soup.select('.stats_table tr'):
	name = stat.select('.stats_name')[0].text.lower()
	champion['stats'][name] = {}

	champion['stats'][name]['base'] = float(stat.select('.stats_value')[0].text)

	try:
	champion['stats'][name]['per_level'] = float(re.search(
	r'([+-]\d+)',
	stat.select('.ability_per_level_stat')[0].text
	).group(0))
	except:
	champion['stats'][name]['per_level'] = 0

	champions.append(champion)
	i += 1
	print "champion %d/%d -- %s is scraped" % (
	i,
	len(champion_urls),
	champion['name']
	)


	print json.dumps(champions)
No results found