getmehire · September 15, 2020 21:21
diff --git a/realEstateScrapper.py b/realEstateScrapper.py
 from bs4 import BeautifulSoup
 import requests
 import random
 import pandas
 from random import randint
 from time import sleep

 targetURL = "https://www.zoopla.co.uk/for-sale/property/edinburgh/"
 headers = {"Accept-Language": "en-GB, en;q=0.5"}
 results = requests.get(targetURL, headers=headers)
 bsSoup = BeautifulSoup(results.text, 'html.parser')
 pandas.options.display.max_columns = None
 pandas.options.display.max_rows = None

 price = []
 address = []
 beds = []
 baths = []
 reception = []
 soldby = []
 size = []
 phone = []

 mainWrapper = bsSoup.find_all("div", class_ ="listing-results-wrapper")
 url = "https://www.zoopla.co.uk/for-sale/property/edinburgh/?identifier=edinburgh&q=Edinburgh&radius=0&pn="
 for page in range(1,5):
 	pageURL = url+str(page)
 	response = requests.get(pageURL)
 	soup = BeautifulSoup(response.text, 'html.parser')
 	wrapperDiv = soup.find_all("div", class_ ="listing-results-wrapper")
 	sleep(randint(2,10))
 	print(pageURL)
 	for property in mainWrapper:
 		housePrice = property.find("a", class_ = "text-price").text.replace("\n", "").replace("Offers over", "").replace(" ", "")
 		housePrice = str(housePrice)
 		price.append(housePrice)
 		houseAddress = property.find("a", class_ = "listing-results-address").text
 		address.append(houseAddress)
 		try:
 			nBeds = property.find("span", class_ = "num-beds")['title']
 		except:
 			nBeds = None
 		beds.append(nBeds)
 		try:
 			nBaths = property.find("span", class_ = "num-baths")['title']
 		except:
 			nBaths = None
 		baths.append(nBaths)
 		try:
 			recRooms = property.find("span", class_ = "num-reception")['title']
 		except:
 			recRooms = None
 		reception.append(recRooms)
 		agentName = property.find("p", class_ = "listing-results-marketed").find("span").text
 		soldby.append(agentName)
 		propertyArea = random.randint(300,2200)
 		size.append(propertyArea)
 		agentPhone = property.find("span", class_ = "agent_phone").text.replace(" **", "").replace("\n", "")
 		phone.append(agentPhone)

 properties = pandas.DataFrame({
 'Price': price,
 'Address': address,
 'Beds': beds,
 'Baths': baths,
 'Receptions': reception,
 'Size (sq. ft)': size,
 'Agent Name': soldby,
 'Agent Phone': phone
 })

 print(len(properties))
 properties.to_csv("properties.csv", index = False)
 print(properties.isnull().sum())
	from bs4 import BeautifulSoup
	import requests
	import random
	import pandas
	from random import randint
	from time import sleep

	targetURL = "https://www.zoopla.co.uk/for-sale/property/edinburgh/"
	headers = {"Accept-Language": "en-GB, en;q=0.5"}
	results = requests.get(targetURL, headers=headers)
	bsSoup = BeautifulSoup(results.text, 'html.parser')
	pandas.options.display.max_columns = None
	pandas.options.display.max_rows = None

	price = []
	address = []
	beds = []
	baths = []
	reception = []
	soldby = []
	size = []
	phone = []

	mainWrapper = bsSoup.find_all("div", class_ ="listing-results-wrapper")
	url = "https://www.zoopla.co.uk/for-sale/property/edinburgh/?identifier=edinburgh&q=Edinburgh&radius=0&pn="
	for page in range(1,5):
	pageURL = url+str(page)
	response = requests.get(pageURL)
	soup = BeautifulSoup(response.text, 'html.parser')
	wrapperDiv = soup.find_all("div", class_ ="listing-results-wrapper")
	sleep(randint(2,10))
	print(pageURL)
	for property in mainWrapper:
	housePrice = property.find("a", class_ = "text-price").text.replace("\n", "").replace("Offers over", "").replace(" ", "")
	housePrice = str(housePrice)
	price.append(housePrice)
	houseAddress = property.find("a", class_ = "listing-results-address").text
	address.append(houseAddress)
	try:
	nBeds = property.find("span", class_ = "num-beds")['title']
	except:
	nBeds = None
	beds.append(nBeds)
	try:
	nBaths = property.find("span", class_ = "num-baths")['title']
	except:
	nBaths = None
	baths.append(nBaths)
	try:
	recRooms = property.find("span", class_ = "num-reception")['title']
	except:
	recRooms = None
	reception.append(recRooms)
	agentName = property.find("p", class_ = "listing-results-marketed").find("span").text
	soldby.append(agentName)
	propertyArea = random.randint(300,2200)
	size.append(propertyArea)
	agentPhone = property.find("span", class_ = "agent_phone").text.replace(" **", "").replace("\n", "")
	phone.append(agentPhone)

	properties = pandas.DataFrame({
	'Price': price,
	'Address': address,
	'Beds': beds,
	'Baths': baths,
	'Receptions': reception,
	'Size (sq. ft)': size,
	'Agent Name': soldby,
	'Agent Phone': phone
	})

	print(len(properties))
	properties.to_csv("properties.csv", index = False)
	print(properties.isnull().sum())