Created
December 10, 2019 23:07
-
-
Save ymkim92/ac60e7765013195c8ec1c739aaa1d717 to your computer and use it in GitHub Desktop.
Collect sold house data from domain.com.au
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| from bs4 import BeautifulSoup | |
| import json | |
| import pandas as pd | |
| import datetime | |
| import time | |
| import sys | |
| import argparse | |
| stop_date = None | |
| def get_arguments(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('date', type=lambda s: datetime.datetime.strptime(s, '%Y%m%d'), | |
| help='date until when to collect data') | |
| return parser.parse_args(sys.argv[1:]) | |
| def extract_json(data): | |
| lindex = data.find('{') | |
| rindex = data.rfind('}') | |
| return data[lindex:rindex+1] | |
| def build_data_list(parsed): | |
| data_list = [] | |
| for _, value in parsed.items(): | |
| value = value['listingModel'] | |
| try: | |
| lat = value['address']['lat'] | |
| lng = value['address']['lng'] | |
| street = value['address']['street'] | |
| suburb = value['address']['suburb'] | |
| auction = value['auction'] | |
| property_type = value['features']['propertyType'] | |
| land_size = value['features']['landSize'] | |
| land_unit = value['features']['landUnit'] | |
| baths = value['features']['baths'] | |
| beds = value['features']['beds'] | |
| parking = value['features']['parking'] | |
| price = int(value['price'].replace('$', '').replace(',', '')) | |
| sold = value['tags']['tagText'] | |
| try: | |
| date = datetime.datetime.strptime(sold[-11:], '%d %b %Y') | |
| if date <= stop_date: | |
| return None | |
| except ValueError: | |
| date = None | |
| except KeyError: | |
| continue | |
| data_list.append([lat, lng, street, suburb, auction, property_type, land_size, land_unit, baths, beds, parking, price, sold, date]) | |
| return data_list | |
| def process_one_page(url): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| data = soup.find_all('script')[3] | |
| # print(data.text) | |
| data = extract_json(data.text) | |
| parsed = json.loads(data)["listingsMap"] | |
| data_list = build_data_list(parsed) | |
| if data_list == None: | |
| return None | |
| return pd.DataFrame(data_list, | |
| columns=[ | |
| 'lat', 'lng', 'street', 'suburb', 'auction', | |
| 'property_type', 'land_size', 'land_unit', | |
| 'baths', 'beds', 'parking', 'price', | |
| 'sold', 'date']) | |
| def main(): | |
| output_df = pd.DataFrame() | |
| for i in range(50): | |
| print(i) | |
| page = i+1 | |
| url = f'https://www.domain.com.au/sold-listings/brisbane-region-qld/?excludepricewithheld=1&page={page}' | |
| df = process_one_page(url) | |
| if df is None: | |
| break | |
| frames = [output_df, df] | |
| output_df = pd.concat(frames, ignore_index=True) | |
| time.sleep(1) | |
| now = datetime.datetime.now().strftime("%Y%m%d_%H%M") | |
| output_df.to_csv('sold_listings_{}.txt'.format(now)) | |
| if __name__ == '__main__': | |
| args = get_arguments() | |
| stop_date = args.date | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ,lat,lng,street,suburb,auction,property_type,land_size,land_unit,baths,beds,parking,price,sold,date | |
| 0,-27.2223167,153.027618,,MANGO HILL,,House,809.0,m²,1,3,4,350000,Sold by private treaty, | |
| 1,-27.183754,153.021286,6 Logan Terrace,DECEPTION BAY,,House,622.0,m²,1,3,1,320000,Sold by private treaty, | |
| 2,-27.7408962,153.091751,"45-59 Rossmore Road,",CHAMBERS FLAT,,House,0.0,m²,3,6,4,1260000,Sold by private treaty 06 Dec 2019,2019-12-06 | |
| 3,-27.04538,153.144241,103 Sunderland Drive,BANKSIA BEACH,,House,546.0,m²,1,3,3,370000,Sold by private treaty 05 Dec 2019,2019-12-05 | |
| 4,-27.42381,153.059616,8 Bennison Street,ASCOT,,House,962.0,m²,2,5,2,2250000,Sold by private treaty 05 Dec 2019,2019-12-05 | |
| 5,-27.1455879,152.975281,7 Ogilvy Road,BURPENGARY,,House,1133.0,m²,2,3,2,545000,Sold by private treaty 06 Dec 2019,2019-12-06 | |
| 6,-27.02739,153.150085,16 Honeymyrtle Street,BANKSIA BEACH,,House,0.0,m²,2,3,2,507500,Sold by private treaty 05 Dec 2019,2019-12-05 | |
| 7,-27.033287,153.125656,78 White Patch Esplanade,WHITE PATCH,,House,2023.0,m²,2,4,5,905000,Sold by private treaty 06 Dec 2019,2019-12-06 | |
| 8,-27.5230579,152.984512,86 Strong Avenue,GRACEVILLE,2019-10-19T10:00:00,House,409.0,m²,3,5,2,1180000,Sold at auction 06 Dec 2019,2019-12-06 | |
| 9,-27.55971,153.080124,2852/1-5 Cremin Street,UPPER MOUNT GRAVATT,,ApartmentUnitFlat,0.0,m²,2,2,1,500000,Sold by private treaty 06 Dec 2019,2019-12-06 | |
| 10,-27.5366058,152.980759,4/97 Primrose Street,SHERWOOD,,Townhouse,192.0,m²,2,4,2,560000,Sold by private treaty 06 Dec 2019,2019-12-06 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment