Skip to content

Instantly share code, notes, and snippets.

@cjwinchester
Created March 4, 2015 06:40
Show Gist options
  • Save cjwinchester/dfcaec0e0b13647e8ac5 to your computer and use it in GitHub Desktop.
Save cjwinchester/dfcaec0e0b13647e8ac5 to your computer and use it in GitHub Desktop.
Douglas County restaurant inspection scraper.
'''
need mechanize, bs4 and geopy
'''
from mechanize import Browser
from bs4 import *
import datetime
from time import *
from geopy.geocoders import GoogleV3
def padZero(x):
if len(str(x)) == 1:
return "0" + str(x)
else:
return str(x)
today = str(datetime.date.today().strftime("%Y-%m-%d"))
f = open('douglas-restaurants-' + today + '.txt', 'wb')
mech = Browser()
mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
mech.set_handle_robots(False)
baseurl = "http://www.douglascountyhealth.com/food-a-drink/food-facility-ratings?rname=&submit=Search"
page = mech.open(baseurl)
html = page.read()
soup = BeautifulSoup(html)
table = soup.find_all('table')[1]
for row in table.find_all('tr')[1:]:
col = row.findAll('td')
name = col[0].text
address = col[1].text
geolocator = GoogleV3()
location = geolocator.geocode(address + "Omaha, NE")
lat = location.latitude
lng = location.longitude
rating = col[2].text
fulldate = col[3].text.split("/")
month = fulldate[0]
day = fulldate[1]
year = fulldate[2]
newdate = year + "-" + padZero(month) + "-" + padZero(day)
f.write("|".join((name.encode('utf-8'),address.encode('utf-8'),str(lat),str(lng),rating.encode('utf-8'),newdate.encode('utf-8'))) + "\n" )
print name
f.flush()
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment