Skip to content

Instantly share code, notes, and snippets.

@Inndy
Last active August 29, 2015 14:22
Show Gist options
  • Select an option

  • Save Inndy/9fdfd4505778af61d678 to your computer and use it in GitHub Desktop.

Select an option

Save Inndy/9fdfd4505778af61d678 to your computer and use it in GitHub Desktop.
import bs4
import requests
import json
doc = bs4.BeautifulSoup(requests.get('http://axe-level-1.herokuapp.com/').content)
d = [ [ td.text for td in row.select('td') ] for row in doc.select('tr')]
header, data = d[0], d[1:]
data = [
{
"name": r[0],
"grades": { header[i]: int(v, 10) for i, v in enumerate(r) if i > 0 }
}
for r in data
]
print(json.dumps(data))
import requests
import bs4
import json
def download(url=''):
return bs4.BeautifulSoup(requests.get('http://axe-level-1.herokuapp.com/lv2' + url).content)
def parse(document):
header =[ 'town', 'village', 'name' ]
data = [
[ td.text for td in tr.select('td') ]
for tr in document.select('tr')
]
return [
{
header[i]: v
for i, v in enumerate(row)
}
for row in data[1:]
]
doc = download()
data = [ i for a in doc.select('a') for i in parse(download(a['href'])) ]
print(json.dumps(data))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment