Skip to content

Instantly share code, notes, and snippets.

@JoosepAlviste
Last active December 7, 2018 20:19
Show Gist options
  • Save JoosepAlviste/1d8451710c4eeff001eb545752f15ee3 to your computer and use it in GitHub Desktop.
Save JoosepAlviste/1d8451710c4eeff001eb545752f15ee3 to your computer and use it in GitHub Desktop.
KV Scraping

KV scraper script to notify of new appartments

The script will keep running and will make a check every minute. If there have been any changes, the changes will be printed to the console and a macOS notification will be sent.

  • pip install -r requirements.txt
  • python scrape.py
beautifulsoup4==4.6.3
import os
import sched
import time
import difflib
from urllib.request import urlopen
from bs4 import BeautifulSoup
url = 'https://www.kv.ee/?act=search.simple&last_deal_type=2&page=1age_size=50&deal_type=2&dt_select=2&county=1&search_type=new&parish=1061&city%5B0%5D=1001&city%5B1%5D=1004&city%5B2%5D=1007&city%5B3%5D=1008&rooms_min=2&rooms_max=2&area_min=30&orderby=cdwl#res'
def notify(title, text):
try:
os.system("""
osascript -e 'display notification "{}" with title "{}" sound name "Glass"'
""".format(text, title))
except:
pass
def parse_objects(url):
page = urlopen(url)
soup = BeautifulSoup(page, 'html.parser')
objects_table = soup.find('table', attrs={'class': 'object-list-table'})
titles = objects_table.find_all('h2', attrs={'class': 'object-title'})
links = [
title.find('a').attrs['href']
for title in titles
]
return '\n'.join(links)
def write_to_new_file(contents):
with open('/tmp/kv-new', 'w+') as f:
f.write(contents)
def read_old_file():
try:
with open('/tmp/kv-old', 'r+') as f:
old_objects = f.read().strip()
except FileNotFoundError:
old_objects = ''
return old_objects
def diff_objects():
objects = str(parse_objects(url)).strip()
write_to_new_file(objects)
with open('/tmp/kv-old') as old_file, open('/tmp/kv-new') as new_file:
diffs = difflib.context_diff(
old_file.readlines(),
new_file.readlines(),
fromfile='/tmp/kv-old',
tofile='/tmp/kv-new',
n=2,
)
had_update = False
for diff in diffs:
print(diff)
had_update = True
if had_update:
notify('KV: New offer available!', 'Check it out')
with open('/tmp/kv-old', 'w+') as old_file:
old_file.write(objects)
def schedule():
print('Started diffing process')
s = sched.scheduler(time.time, time.sleep)
def diff_and_schedule(sc):
print('>>> Diffing...')
diff_objects()
print('<<< Finished diffing...\n')
s.enter(60, 1, diff_and_schedule, (s,))
s.enter(0, 1, diff_and_schedule, (s,))
s.run()
def main():
schedule()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment