Created
July 11, 2017 19:54
-
-
Save vb100/c92a543b8030f252cb8201df10d19a51 to your computer and use it in GitHub Desktop.
This Python application read all data of real estate object directly from webpage. Then wrrite all the data into Panda dataframe and retrieve all the data on terminal.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup as bc | |
l =[] #List | |
base_url = "http://www.pythonhow.com/real-estate/rock-springs-wy/LCWYROCKSPRINGS/" | |
r = requests.get(base_url) | |
c = r.content | |
soup = bc(c, "html.parser") | |
page_nr = soup.find_all("a",{"class":"Page"})[-1].text | |
print(page_nr) | |
for page in range(0,int(page_nr)*10,10): | |
r = requests.get("http://www.pythonhow.com/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s="+str(page)+".html") | |
print ("http://www.pythonhow.com/real-estate/rock-springs-wy/LCWYROCKSPRINGS/t=0&s="+str(page)+".html") | |
print(r) | |
c = r.content | |
soup = bc(c,"html.parser") | |
all= soup.find_all("div",{"class":"propertyRow"}) | |
for item in all: | |
d={} | |
d["Address"] = item.find_all("span",{"class","propAddressCollapse"})[0].text | |
try: | |
d["Locality"] = item.find_all("span",{"class","propAddressCollapse"})[1].text | |
except: | |
d["Locality"] = None | |
d["Price"] = item.find("h4", {"class": "propPrice"}).text.replace("\n", "").replace(" ", "") | |
try: | |
d["Beds"] = item.find("span",{"class","infoBed"}).find("b").text | |
except: | |
d["Beds"] = None | |
try: | |
d["Area"] = item.find("span",{"class","infoSqFt"}).find("b").text | |
except: | |
d["Area"] = None | |
try: | |
d["Full Baths"] = item.find("span",{"class","infoValueFullBath"}).find("b").text | |
except: | |
d["Full Baths"] = None | |
try: | |
d["Half Baths"] = item.find("span", {"class", "infoValueHalfBath"}).find("b").text | |
except: | |
d["Half Baths"] = None | |
for column_group in item.find_all("div",{"class":"columnGroup"}): | |
#print(column_group) | |
for feature_group, feature_name in zip(column_group.find_all("span",{"class":"featureGroup"}), column_group.find_all("span",{"class":"featureName"})): | |
#print feature_group.text, feature_name.text | |
if "Lot Size" in feature_group.text: | |
d["Lot Size"] = feature_name.text | |
l.append(d) | |
import pandas | |
df = pandas.DataFrame(l) | |
print(df) | |
df.to_csv("Output.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment