Skip to content

Instantly share code, notes, and snippets.

@Omar-Salem
Last active April 20, 2019 02:51
Show Gist options
  • Save Omar-Salem/e936754e19d8060771770433f63582d2 to your computer and use it in GitHub Desktop.
Save Omar-Salem/e936754e19d8060771770433f63582d2 to your computer and use it in GitHub Desktop.
flatemates crawler/filtere
import json
from haversine import haversine
import urllib2
from bs4 import BeautifulSoup
def read_data():
with open('data.json', 'r') as data:
return data.read()
def filter_data(data):
center=(-37.806914398397005,144.9416857585693)
arr_filtered=[]
arr_raw=json.loads(data)
for d in arr_raw["matches"]:
latitude=d["latitude"]
longitude=d["longitude"]
place=(latitude,longitude)
if haversine(center, place)<=7:
arr_filtered.append(d["listing_link"])
return arr_filtered
def crawl_listings(listings):
arr_filtered=[]
for l in listings:
url="https://flatmates.com.au"+l
page = urllib2.urlopen(url)
soup = BeautifulSoup(page, 'html.parser')
if 'Free to message' not in str(soup):
continue
features = soup.findAll("div", {"class": "styles__value___V-EvS styles__iconPresent___2jaGH"})
roommates_div=features[2]
roommates_count=roommates_div.getText()
if int(roommates_count)>2:
continue
print(url)
print("----------")
arr_filtered.append(url)
return arr_filtered
def write_listings(listings):
with open('output.txt', 'w+') as output:
for item in listings:
output.write(item+'\n')
return
data=read_data()
arr_filtered=filter_data(data)
listings=crawl_listings(arr_filtered)
write_listings(listings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment