Skip to content

Instantly share code, notes, and snippets.

@ymkim92
Created December 10, 2019 23:07
Show Gist options
  • Save ymkim92/ac60e7765013195c8ec1c739aaa1d717 to your computer and use it in GitHub Desktop.
Save ymkim92/ac60e7765013195c8ec1c739aaa1d717 to your computer and use it in GitHub Desktop.
Collect sold house data from domain.com.au
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import datetime
import time
import sys
import argparse
stop_date = None
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('date', type=lambda s: datetime.datetime.strptime(s, '%Y%m%d'),
help='date until when to collect data')
return parser.parse_args(sys.argv[1:])
def extract_json(data):
lindex = data.find('{')
rindex = data.rfind('}')
return data[lindex:rindex+1]
def build_data_list(parsed):
data_list = []
for _, value in parsed.items():
value = value['listingModel']
try:
lat = value['address']['lat']
lng = value['address']['lng']
street = value['address']['street']
suburb = value['address']['suburb']
auction = value['auction']
property_type = value['features']['propertyType']
land_size = value['features']['landSize']
land_unit = value['features']['landUnit']
baths = value['features']['baths']
beds = value['features']['beds']
parking = value['features']['parking']
price = int(value['price'].replace('$', '').replace(',', ''))
sold = value['tags']['tagText']
try:
date = datetime.datetime.strptime(sold[-11:], '%d %b %Y')
if date <= stop_date:
return None
except ValueError:
date = None
except KeyError:
continue
data_list.append([lat, lng, street, suburb, auction, property_type, land_size, land_unit, baths, beds, parking, price, sold, date])
return data_list
def process_one_page(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data = soup.find_all('script')[3]
# print(data.text)
data = extract_json(data.text)
parsed = json.loads(data)["listingsMap"]
data_list = build_data_list(parsed)
if data_list == None:
return None
return pd.DataFrame(data_list,
columns=[
'lat', 'lng', 'street', 'suburb', 'auction',
'property_type', 'land_size', 'land_unit',
'baths', 'beds', 'parking', 'price',
'sold', 'date'])
def main():
output_df = pd.DataFrame()
for i in range(50):
print(i)
page = i+1
url = f'https://www.domain.com.au/sold-listings/brisbane-region-qld/?excludepricewithheld=1&page={page}'
df = process_one_page(url)
if df is None:
break
frames = [output_df, df]
output_df = pd.concat(frames, ignore_index=True)
time.sleep(1)
now = datetime.datetime.now().strftime("%Y%m%d_%H%M")
output_df.to_csv('sold_listings_{}.txt'.format(now))
if __name__ == '__main__':
args = get_arguments()
stop_date = args.date
main()
,lat,lng,street,suburb,auction,property_type,land_size,land_unit,baths,beds,parking,price,sold,date
0,-27.2223167,153.027618,,MANGO HILL,,House,809.0,m²,1,3,4,350000,Sold by private treaty,
1,-27.183754,153.021286,6 Logan Terrace,DECEPTION BAY,,House,622.0,m²,1,3,1,320000,Sold by private treaty,
2,-27.7408962,153.091751,"45-59 Rossmore Road,",CHAMBERS FLAT,,House,0.0,m²,3,6,4,1260000,Sold by private treaty 06 Dec 2019,2019-12-06
3,-27.04538,153.144241,103 Sunderland Drive,BANKSIA BEACH,,House,546.0,m²,1,3,3,370000,Sold by private treaty 05 Dec 2019,2019-12-05
4,-27.42381,153.059616,8 Bennison Street,ASCOT,,House,962.0,m²,2,5,2,2250000,Sold by private treaty 05 Dec 2019,2019-12-05
5,-27.1455879,152.975281,7 Ogilvy Road,BURPENGARY,,House,1133.0,m²,2,3,2,545000,Sold by private treaty 06 Dec 2019,2019-12-06
6,-27.02739,153.150085,16 Honeymyrtle Street,BANKSIA BEACH,,House,0.0,m²,2,3,2,507500,Sold by private treaty 05 Dec 2019,2019-12-05
7,-27.033287,153.125656,78 White Patch Esplanade,WHITE PATCH,,House,2023.0,m²,2,4,5,905000,Sold by private treaty 06 Dec 2019,2019-12-06
8,-27.5230579,152.984512,86 Strong Avenue,GRACEVILLE,2019-10-19T10:00:00,House,409.0,m²,3,5,2,1180000,Sold at auction 06 Dec 2019,2019-12-06
9,-27.55971,153.080124,2852/1-5 Cremin Street,UPPER MOUNT GRAVATT,,ApartmentUnitFlat,0.0,m²,2,2,1,500000,Sold by private treaty 06 Dec 2019,2019-12-06
10,-27.5366058,152.980759,4/97 Primrose Street,SHERWOOD,,Townhouse,192.0,m²,2,4,2,560000,Sold by private treaty 06 Dec 2019,2019-12-06
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment