Skip to content

Instantly share code, notes, and snippets.

Created December 26, 2014 23:43
Show Gist options
  • Save Glench/229b5388bf2802091e96 to your computer and use it in GitHub Desktop.
Save Glench/229b5388bf2802091e96 to your computer and use it in GitHub Desktop.
import sys
import itertools
from pyquery import PyQuery as pq
def download_and_parse(wikipedia_url):
page = pq(url=wikipedia_url.strip())
key_values = {}
for tr in page('.infobox tr'):
tr = pq(tr)
cells = tr('th,td')
if len(cells) == 2:
key_values[cells.eq(0).text().strip()] = cells.eq(1).text().strip()
return key_values
results = itertools.imap(download_and_parse, sys.stdin)
from pprint import pprint
for result in results:
for key, value in result.iteritems():
# do something
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment