Skip to content

Instantly share code, notes, and snippets.

@gotoplanb
Created December 8, 2010 19:20
Show Gist options
  • Save gotoplanb/733755 to your computer and use it in GitHub Desktop.
Save gotoplanb/733755 to your computer and use it in GitHub Desktop.
table scraper
#!/usr/bin/env python
from urllib import unquote
import csv
from mechanize import Browser
from BeautifulSoup import BeautifulSoup
mech = Browser()
url = "http://g8rweb.com/scrapers/lakecityagents.html"
page = mech.open(url)
html = page.read()
soup = BeautifulSoup(html)
table = soup.find("table")
myPrettyWriter = csv.writer(open('myfile.csv', 'w'), delimiter="\t")
for row in table.findAll('tr'):
email = row.findAll('td')[0].find('a')
try:
email = row.findAll('td')[0].find('a')
email = unquote(email.get('href', None).split(':')[-1]).strip()
except:
continue
fullname = row.findAll('td')[1].find('font').string
myPrettyWriter.writerow([email, fullname])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment