Last active
August 29, 2015 14:25
-
-
Save walkerdb/04f5bccf0b20328cfea9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
def get_lc_term_name(lc_auth_number): | |
# create the LoC address by inserting the auth id into a template | |
lc_template = "http://id.loc.gov/authorities/names/{0}.html" | |
lc_address = lc_template.format(lc_auth_number) | |
# get the html for that address | |
response = urlopen(lc_address).read() | |
# Create a BeatifulSoup object from the response | |
soup = BeautifulSoup(response) | |
# the info we want is in the first <h1> tag | |
# BeautifulSoup makes extracting that text very simple: | |
lc_name = soup.h1.text.encode("utf-8") | |
return lc_name |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment