Created
January 4, 2012 23:00
-
-
Save csessig86/1562705 to your computer and use it in GitHub Desktop.
This Python code goes through the Iowa GOP's website (actually the info was pulled from the GOP site and posted on my personal site after I cleaned it up a bit), scrapes caucus locations and exports it into a TSV. Based on a tutorial from BuzzData
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scraper based on this tutorial from BuzzData: | |
# http://blog.buzzdata.com/post/11871523667/how-to-scrape-toronto-data-a-basic-tutorial | |
import urllib2 | |
from BeautifulSoup import BeautifulSoup | |
# Create a file called "ward_pop.tsv" where we'll save our data | |
f = open('caucus_locations02.tsv', 'w') | |
# Make a header row: locationname (tab) precinct (tab) address (tab) address2 | |
f.write("locationname" + "\t" + "precinct" + "\t" + "address" + "\t" + "address2" + "\n") | |
# Location of the page we will scrape | |
url = 'http://chrisessig.com/caucus_locations02.html' | |
page = urllib2.urlopen(url) | |
soup = BeautifulSoup(page) | |
# Will have 1749 caucus precinct locations (total number of locations in Iowa) | |
for x in range(0, 1749): | |
sentence = soup.findAll('div', attrs={'class': 'locationname'})[x].text | |
sentence2 = soup.findAll('div', attrs={'class': 'precinct'})[x].text | |
sentence3 = soup.findAll('div', attrs={'class': 'address'})[x].text | |
sentence4 = soup.findAll('div', attrs={'class': 'address2'})[x].text | |
# Extract the information from the word array | |
location = str(sentence) | |
precinct = str(sentence2) | |
address = str(sentence3) | |
address2 = str(sentence4) | |
#write to file: | |
f.write(location + "\t" + precinct + "\t" + address + "\t" + address2 + "\n") | |
#You're done! Close file. | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment