Skip to content

Instantly share code, notes, and snippets.

@bhubbard
Forked from mansooralam/zillowMiner.py
Created October 31, 2016 21:35
Show Gist options
  • Save bhubbard/ae8870c40ecf9b369dacff15df3cca70 to your computer and use it in GitHub Desktop.
Save bhubbard/ae8870c40ecf9b369dacff15df3cca70 to your computer and use it in GitHub Desktop.
Quick helper script for mining zillow data / commute times
"""
quick script put together to scrape zillow listing data
at the very least it functions so you don't need to go back and forth to calculate commute times
very messy... maybe i'll revisit this someday
"""
import requests
import re
import csv
from pyzillow import pyzillow
from bs4 import BeautifulSoup
from gmaps import Geocoding,Directions
import json
# AUTH FOR ZILLOW
zwsid = "zillow_id"
zillow_data = pyzillow.ZillowWrapper(zwsid)
# Initialize a directions class for Google Maps
api2 = Directions()
"""
or construct URL based on different parameters...
beds =
bath =
town =
state =
building =
min_rent =
max_rent =
...
"""
dest_1 = "42.463441, -71.266918" # location 1
dest_2 ="42.74625, -71.181849" # location 2
# Sample URL
url = "http://www.zillow.com/homes/for_rent/Cambridge-MA/house,condo,apartment_duplex,townhouse_type/3934_rid/2-_beds/1-_baths/0-757142_price/0-2800_mp/days_sort/42.396016,-71.035795,42.360764,-71.188574_rect/12_zm/")
def extractID(to_parse):
""" just for cleanliness"""
address = re.findall("/homedetails/(.*)/[1-9]",to_parse)[0].replace("-"," ")[:-5]
zipcode = re.findall("/homedetails/(.*)/[1-9]",to_parse)[0].replace("-","")[-5:]
return (str(address),str(zipcode))
def downURL(url):
""" downloads individual listing links for apts matching search criteria"""
# append list of results to id_list
id_list = []
r = requests.get(url)
soup = BeautifulSoup(r.text)
results = soup.findAll("a", {"class" : "hdp-link routable"})
for i in range(len(results)):
# [id_list.append(extractID(str(results[i]))) for i in range(len(results))]
return id_list
def getTravelDuration(route):
""" calculate commute time"""
for i in route:
jsonstring=json.dumps(i)
parsed_route = json.loads(jsonstring)
# parse through JSON to get lat,lng and time at each step of the way
for step in parsed_route['legs'][0]['steps']:
lat = step['start_location']['lat']
duration = step['duration']['value'] + duration
return duration * 60 # duration is in seconds
def getZillowData(add):
""" create a dictionary of property data """
property_data = {}
property_details = zillow_data.get_deep_search_results(add[0], add[1])
result = pyzillow.GetDeepSearchResults(property_details)
property_data['zillow_id'] = result.zillow_id
property_data['lat'] = result.latitude
property_data['lon'] = result.longitude
property_data['size'] = result.longitude
property_data['bath'] = result.longitude
property_data['bed'] = result.longitude
property_data['details'] = result.home_detail_link
# Extract Price
price_getter = requests.get(result.home_detail_link)
soup = BeautifulSoup(price_getter.text)
parse_me = soup.findAll("div", {"class" : "main-row home-summary-row"})
property_data['price'] = re.findall("\$[^a-z],\d\d\d",price)[0]
# Set route parameters
route1 = api2.directions(add,dest_1,mode="driving")
route2 = api2.directions(add,dest_2,mode="transit")
# Get commute times
property_data['dist_1'] = getTravelDuration(route1)
property_data['dist_2'] = getTravelDuration(route2)
return property_data
def toCSV(url):
""" write out results to CSV """
prop_list = downURL(url)
properties= []
# Get data for each property
for i in prop_list:
properties.append(getZillowData(i))
keys = properties[0].keys()
# Write to CSV file
with open('filename.csv', 'wb') as outfile:
dict_writer = csv.DictWriter(outfile, keys)
dict_writer.writeheader()
dict_writer.writerows(properties)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment