Skip to content

Instantly share code, notes, and snippets.

@ijharulislam
Created September 8, 2016 08:29
Show Gist options
  • Select an option

  • Save ijharulislam/b7403455ec4fd2a55f8a43b0def38dfa to your computer and use it in GitHub Desktop.

Select an option

Save ijharulislam/b7403455ec4fd2a55f8a43b0def38dfa to your computer and use it in GitHub Desktop.
# import urllib
# import urllib2
from datetime import datetime
import xlrd
from lxml import html
from bs4 import BeautifulSoup
import xlsxwriter
import re
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import csv
prop_book = xlrd.open_workbook("Zillow_CA_Sacramento.xls")
prop_sheet = prop_book.sheet_by_index(0)
data = []
import socks
import socket
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS4, '127.0.0.1', 9150, True)
socket.socket = socks.socksocket
import urllib2
import requests
import random
##### Range 1-2865 #######
# from selenium import webdriver
# profile=webdriver.FirefoxProfile()
# profile.set_preference('network.proxy.type', 1)
# profile.set_preference('network.proxy.socks', '127.0.0.1')
# profile.set_preference('network.proxy.socks_port',9150)
# from fake_useragent import UserAgent
# ua = UserAgent()
# profile = webdriver.FirefoxProfile()
# profile.set_preference("general.useragent.override", ua.chrome)
# driver=webdriver.Firefox(profile)
##### Secend Loop 424
# driver = webdriver.Firefox()
proxy_list = [
"119.81.13.24:8080",
"109.236.91.82:8080",
"85.194.95.85:8080",
"162.219.176.205:8080"
]
try:
k = 0
#6653,6736
for i in range(6653,6736):
k = k+1
o = prop_sheet.row_values(i)
output = {}
output["SNO"] = o[0]
output["Parcel Number"] = o[1]
output["Zillow URL"] = o[2]
# full_add = o[]
# output["Search"] = full_add
# encoded_url = urllib.urlencode({'enc_url':full_add})
# link_data = encoded_url.replace("enc_url=","")
# link = "http://www.zillow.com/homes/%s_rb/"%link_data
# print link
# output["Zillow Url"] = link
r = requests.get(o[2],proxies={'https': "109.236.91.82:8080"})
# if k == 1 :
# driver.get(o[2])
# time.sleep(40)
# else:
# driver.get(o[2])
# try:
# elm = driver.find_element_by_xpath('//div[@class="error-text-content"]')
# time.sleep(40)
# except:
# time.sleep(4)
sup = BeautifulSoup(r.content,"lxml")
try:
try:
try:
rent_zest = sup.find_all("div", class_="zest zsg-lg-1-3 zsg-md-1-1")[1].find("div",class_="zest-value").text
print "Rent Zestimate"
print rent_zest
output["Rent Zestimate"] = rent_zest
except:
pass
zestimate = sup.find("div", class_="estimates").find_all("div", class_="home-summary-row")[1].find_all("span")[1].text
if zestimate is not None and "$" in zestimate:
print "Inside Zesti"
print zestimate
output["Zestimate"] = zestimate
else:
# output["Zillow Addres"] = sup.find("header", class_="zsg-content-header addr").find("h1").text
zestimate = sup.find("div", class_="estimates").find_all("div", class_="home-summary-row")[2].find_all("span")[1].text
print "Inside Zestimate Sold"
output["Zestimate"] = zestimate
except Exception,e:
print e
print "Inside Sold"
# output["Zillow Addres"] = sup.find("header", class_="zsg-content-header addr").find("h1").text
zestimate = sup.find("div", class_="estimates").find_all("div", class_="home-summary-row")[2].find_all("span")[1].text
print zestimate
output["Zestimate"] = zestimate
except:
data.append(output)
print output
continue
data.append(output)
print output
# if i == 4000:
# def WriteDictToCSV(csv_columns,dict_data):
# with open("rent_zest9.csv", 'w') as csvfile:
# writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
# writer.writeheader()
# for row in dict_data:
# writer.writerow(row)
# csv_columns =["SNO","Parcel Number",'Zillow URL','Zestimate','Rent Zestimate']
# WriteDictToCSV(csv_columns,data)
except Exception, e:
print e
pass
finally:
def WriteDictToCSV(csv_columns,dict_data):
# Except 3 in serial
# 6653,6736
with open("6653,6736.csv", 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for row in dict_data:
writer.writerow(row)
csv_columns =["SNO","Parcel Number",'Zillow URL','Zestimate','Rent Zestimate']
WriteDictToCSV(csv_columns,data)
# There was some error from 3485 -3495
# Error from 3597
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment