Created
February 14, 2011 06:54
-
-
Save vedantk/825572 to your computer and use it in GitHub Desktop.
Traces the bloodlines of academic royalty by sifting through Wikipedia.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Traces the bloodlines of academic royalty by sifting through Wikipedia. | |
import re | |
import urllib2 | |
from BeautifulSoup import BeautifulSoup | |
opener = urllib2.build_opener() | |
opener.addheaders = [('User-agent', 'Mozilla/5.0')] | |
advisor = re.compile("advisor", re.IGNORECASE) | |
prefix = "http://en.wikipedia.org" | |
def procLink(link, level=0): | |
try: | |
page = opener.open(link).read() | |
soup = BeautifulSoup(page) | |
except: | |
return [] | |
name = soup.find(id="firstHeading").string | |
vcard = soup.find("table", "infobox vcard") | |
if not vcard: | |
return [] | |
row = vcard.find("tr", text=advisor) | |
if not row: | |
return name | |
mentors = row.findNext("td").findChildren("a") | |
refs = [prefix + elt['href'] for elt in mentors] | |
print level * ">> ", name | |
children = [procLink(ref, level + 1) for ref in refs] | |
return [name] + [children] | |
if __name__ == '__main__': | |
print procLink("http://en.wikipedia.org/wiki/Feynman") | |
print procLink("http://en.wikipedia.org/wiki/John_McCarthy_(computer_scientist)") | |
sample = """ | |
Richard Feynman | |
>> John Archibald Wheeler | |
>> >> Karl Herzfeld | |
>> >> >> Friedrich Hasenöhrl | |
>> >> >> >> Franz S. Exner | |
>> >> >> >> >> August Kundt | |
>> >> >> >> >> >> Heinrich Gustav Magnus | |
>> >> >> >> >> >> >> Eilhard Mitscherlich | |
>> >> >> >> >> >> >> >> Friedrich Stromeyer | |
>> >> >> >> >> >> >> >> >> Johann Friedrich Gmelin | |
>> >> >> >> >> >> >> >> >> >> Philipp Friedrich Gmelin | |
>> >> >> >> >> >> >> >> >> >> >> Burchard Mauchart | |
>> >> >> >> >> >> >> >> >> >> >> >> Elias Rudolph Camerarius Jr. | |
>> >> >> >> >> >> >> >> >> >> >> >> >> Elias Rudolph Camerarius, Sr. | |
>> >> >> >> >> >> >> >> >> >> >> >> >> >> Georg Balthasar Metzger | |
>> >> >> >> >> >> >> >> >> >> >> >> >> >> >> Johann Georg Macasius | |
>> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> Johannes Musaeus | |
>> >> >> >> >> >> >> >> >> >> >> >> >> >> >> Emmanuel Stupanus | |
>> >> >> >> >> >> >> >> >> Louis Nicolas Vauquelin | |
>> >> >> >> >> >> >> >> >> >> Antoine François, comte de Fourcroy | |
[u'Richard Feynman', [[u'John Archibald Wheeler', [[u'Karl Herzfeld', [[u'Friedrich Hasen\xf6hrl', [[u'Franz S. Exner', [[u'August Kundt', [[u'Heinrich Gustav Magnus', [[u'Eilhard Mitscherlich', [[u'Friedrich Stromeyer', [[u'Johann Friedrich Gmelin', [[u'Philipp Friedrich Gmelin', [[u'Burchard Mauchart', [[u'Elias Rudolph Camerarius Jr.', [[u'Elias Rudolph Camerarius, Sr.', [[u'Georg Balthasar Metzger', [[u'Johann Georg Macasius', [[u'Johannes Musaeus', [[]]], []]], [u'Emmanuel Stupanus', [[]]]]]]]]]]]]], []]], [u'Louis Nicolas Vauquelin', [[u'Antoine Fran\xe7ois, comte de Fourcroy', [[]]]]]]]]]]]]]]]]]]]]]]] | |
John McCarthy (computer scientist) | |
>> Solomon Lefschetz | |
>> >> William Edward Story | |
>> >> >> Felix Klein | |
>> >> >> >> Julius Plücker | |
>> >> >> >> >> Christian Ludwig Gerling | |
>> >> >> >> >> >> Carl Friedrich Gauss | |
>> >> >> >> >> >> >> Johann Friedrich Pfaff | |
>> >> >> >> >> >> >> >> Abraham Gotthelf Kästner | |
>> >> >> >> >> >> >> >> >> Christian August Hausen | |
>> >> >> >> >> >> >> >> >> >> Johann Christoph Wichmannshausen | |
>> >> >> >> >> >> >> >> >> >> >> Otto Mencke | |
>> >> >> >> >> >> >> >> >> >> >> >> Jakob Thomasius | |
>> >> >> >> Rudolf Lipschitz | |
>> >> >> >> >> Johann Peter Gustav Lejeune Dirichlet | |
>> >> >> >> >> >> Siméon Denis Poisson | |
>> >> >> >> >> >> >> Joseph Louis Lagrange | |
>> >> >> >> >> >> >> >> Leonhard Euler | |
>> >> >> >> >> >> >> >> >> Johann Bernoulli | |
>> >> >> >> >> >> >> Pierre-Simon Laplace | |
>> >> >> >> >> >> Joseph Fourier | |
>> >> >> >> >> >> >> Joseph Louis Lagrange | |
>> >> >> >> >> >> >> >> Leonhard Euler | |
>> >> >> >> >> >> >> >> >> Johann Bernoulli | |
>> >> >> Carl Neumann | |
[u'John McCarthy (computer scientist)', [[u'Solomon Lefschetz', [[u'William Edward Story', [[u'Felix Klein', [[u'Julius Pl\xfccker', [[u'Christian Ludwig Gerling', [[u'Carl Friedrich Gauss', [[u'Johann Friedrich Pfaff', [[u'Abraham Gotthelf K\xe4stner', [[u'Christian August Hausen', [[u'Johann Christoph Wichmannshausen', [[u'Otto Mencke', [[u'Jakob Thomasius', [[]]]]]]]]]]]]]]]]]]], [u'Rudolf Lipschitz', [[u'Johann Peter Gustav Lejeune Dirichlet', [[u'Sim\xe9on Denis Poisson', [[u'Joseph Louis Lagrange', [[u'Leonhard Euler', [[u'Johann Bernoulli', [u'Jacob Bernoulli']]]]]], [u'Pierre-Simon Laplace', [u"Jean le Rond d'Alembert", []]]]], [u'Joseph Fourier', [[u'Joseph Louis Lagrange', [[u'Leonhard Euler', [[u'Johann Bernoulli', [u'Jacob Bernoulli']]]]]]]]]], []]]]], [u'Carl Neumann', [[], []]]]]]]]] | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment