Created
          March 4, 2015 06:40 
        
      - 
      
 - 
        
Save cjwinchester/dfcaec0e0b13647e8ac5 to your computer and use it in GitHub Desktop.  
    Douglas County restaurant inspection scraper.
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | ''' | |
| need mechanize, bs4 and geopy | |
| ''' | |
| from mechanize import Browser | |
| from bs4 import * | |
| import datetime | |
| from time import * | |
| from geopy.geocoders import GoogleV3 | |
| def padZero(x): | |
| if len(str(x)) == 1: | |
| return "0" + str(x) | |
| else: | |
| return str(x) | |
| today = str(datetime.date.today().strftime("%Y-%m-%d")) | |
| f = open('douglas-restaurants-' + today + '.txt', 'wb') | |
| mech = Browser() | |
| mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | |
| mech.set_handle_robots(False) | |
| baseurl = "http://www.douglascountyhealth.com/food-a-drink/food-facility-ratings?rname=&submit=Search" | |
| page = mech.open(baseurl) | |
| html = page.read() | |
| soup = BeautifulSoup(html) | |
| table = soup.find_all('table')[1] | |
| for row in table.find_all('tr')[1:]: | |
| col = row.findAll('td') | |
| name = col[0].text | |
| address = col[1].text | |
| geolocator = GoogleV3() | |
| location = geolocator.geocode(address + "Omaha, NE") | |
| lat = location.latitude | |
| lng = location.longitude | |
| rating = col[2].text | |
| fulldate = col[3].text.split("/") | |
| month = fulldate[0] | |
| day = fulldate[1] | |
| year = fulldate[2] | |
| newdate = year + "-" + padZero(month) + "-" + padZero(day) | |
| f.write("|".join((name.encode('utf-8'),address.encode('utf-8'),str(lat),str(lng),rating.encode('utf-8'),newdate.encode('utf-8'))) + "\n" ) | |
| print name | |
| f.flush() | |
| f.close() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment