Skip to content

Instantly share code, notes, and snippets.

@getmehire
Last active September 15, 2020 21:21
Show Gist options
  • Save getmehire/feb31afd4de5ea9649f5352e97083aeb to your computer and use it in GitHub Desktop.
Save getmehire/feb31afd4de5ea9649f5352e97083aeb to your computer and use it in GitHub Desktop.
Real Estate Scraper with Python 3, Beautiful Soup and Pandas
from bs4 import BeautifulSoup
import requests
import random
import pandas
from random import randint
from time import sleep
targetURL = "https://www.zoopla.co.uk/for-sale/property/edinburgh/"
headers = {"Accept-Language": "en-GB, en;q=0.5"}
results = requests.get(targetURL, headers=headers)
bsSoup = BeautifulSoup(results.text, 'html.parser')
pandas.options.display.max_columns = None
pandas.options.display.max_rows = None
price = []
address = []
beds = []
baths = []
reception = []
soldby = []
size = []
phone = []
mainWrapper = bsSoup.find_all("div", class_ ="listing-results-wrapper")
url = "https://www.zoopla.co.uk/for-sale/property/edinburgh/?identifier=edinburgh&q=Edinburgh&radius=0&pn="
for page in range(1,5):
pageURL = url+str(page)
response = requests.get(pageURL)
soup = BeautifulSoup(response.text, 'html.parser')
wrapperDiv = soup.find_all("div", class_ ="listing-results-wrapper")
sleep(randint(2,10))
print(pageURL)
for property in mainWrapper:
housePrice = property.find("a", class_ = "text-price").text.replace("\n", "").replace("Offers over", "").replace(" ", "")
housePrice = str(housePrice)
price.append(housePrice)
houseAddress = property.find("a", class_ = "listing-results-address").text
address.append(houseAddress)
try:
nBeds = property.find("span", class_ = "num-beds")['title']
except:
nBeds = None
beds.append(nBeds)
try:
nBaths = property.find("span", class_ = "num-baths")['title']
except:
nBaths = None
baths.append(nBaths)
try:
recRooms = property.find("span", class_ = "num-reception")['title']
except:
recRooms = None
reception.append(recRooms)
agentName = property.find("p", class_ = "listing-results-marketed").find("span").text
soldby.append(agentName)
propertyArea = random.randint(300,2200)
size.append(propertyArea)
agentPhone = property.find("span", class_ = "agent_phone").text.replace(" **", "").replace("\n", "")
phone.append(agentPhone)
properties = pandas.DataFrame({
'Price': price,
'Address': address,
'Beds': beds,
'Baths': baths,
'Receptions': reception,
'Size (sq. ft)': size,
'Agent Name': soldby,
'Agent Phone': phone
})
print(len(properties))
properties.to_csv("properties.csv", index = False)
print(properties.isnull().sum())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment