Last active
June 13, 2023 14:21
-
-
Save algomaster99/c9344a4dbaa78731afd33162688e5c6a to your computer and use it in GitHub Desktop.
SSSB scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Run `pip install selenium` before starting script | |
import datetime | |
import json | |
import os | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
from selenium.common.exceptions import TimeoutException, NoSuchElementException | |
COLLECT_RUNTIME = [] | |
PASSWORD = '[REDACTED]' | |
EMAIL = '[REDACTED]' | |
DATE = datetime.date(2023, 3, 16) | |
RERUN_AFTER_SECONDS = 60 * 60 * 0.5 | |
while True: | |
areas = [ | |
'Apeln', | |
'Domus', | |
'Embla', | |
'FORUM', | |
'Hugin', | |
'Idun', | |
'Jerum', | |
'Kungsh', | |
'Lappis', | |
'Nypone', | |
'Pax', | |
'Roslag', | |
'STRIX', | |
] | |
def load_apartments_from_file(): | |
if os.stat('apartments.json').st_size != 0: | |
with open('apartments.json', 'r') as f: | |
return json.load(f) | |
return [] | |
URL = 'https://sssb.se/en/looking-for-housing/apply-for-apartment/available-apartments/' | |
browser = webdriver.Firefox() | |
browser.get('https://sssb.se/en/looking-for-housing/apply-for-apartment/available-apartments/') | |
DELAY_SECONDS = 10 | |
hide_cookies = ''' | |
document.querySelector('.cc_container').style.display = 'none'; | |
''' | |
def update_apartment_set(): | |
main_class_name = 'ObjektListItem ' | |
apartments = browser.find_elements(By.CLASS_NAME, main_class_name) | |
for apartment in apartments: | |
address_container = apartment.find_element(By.CLASS_NAME, 'ObjektAdress') | |
address_link = address_container.find_element(By.TAG_NAME, 'a') | |
agreement_start = apartment.find_element(By.CSS_SELECTOR, 'dd.ObjektInflytt') | |
type_container = apartment.find_element(By.CLASS_NAME, 'ObjektTyp') | |
type_link = type_container.find_element(By.TAG_NAME, 'a') | |
credits = apartment.find_element(By.CSS_SELECTOR, 'dd.ObjektAntalIntresse') | |
d = { | |
'address': address_link.text, | |
'link': address_link.get_attribute('href'), | |
'apartmentType': type_link.text, | |
'date': agreement_start.text, | |
'credits': credits.text | |
} | |
yyyy, mm, dd = d['date'].split('-') | |
agreement_date = datetime.date(int(yyyy), int(mm), int(dd)) | |
apartments = load_apartments_from_file() | |
for apartment in apartments: | |
if apartment['address'] == d['address'] and apartment['date'] == d['date']: | |
break | |
else: | |
if agreement_date > DATE and d['apartmentType'] != 'Corridor room': | |
COLLECT_RUNTIME.append(d) | |
while True: | |
try: | |
next_button_container = WebDriverWait(browser, DELAY_SECONDS).until( | |
EC.presence_of_element_located((By.CLASS_NAME, 'PaginationNextExists')) | |
) | |
address_link = next_button_container.find_element(By.TAG_NAME, 'a') | |
browser.execute_script(hide_cookies) | |
address_link.click() | |
WebDriverWait(browser, DELAY_SECONDS).until( | |
EC.presence_of_element_located((By.CLASS_NAME, 'ObjektListItem ')) | |
) | |
update_apartment_set() | |
except (NoSuchElementException, TimeoutException): | |
break | |
browser.close() | |
EMAIL_BODY = u'''Subject: [SSSB] {date} {address} | |
Hi, | |
New accommodation available at SSSB. | |
Address: {address} | |
Link: {link} | |
Type: {apartment_type} | |
Date: {date} | |
Credits: {credits} | |
Regards, | |
SSSB Bot | |
''' | |
def send_email(body): | |
import smtplib, ssl | |
smtp_server = "smtp.gmail.com" | |
port = 587 # For starttls | |
sender_email = EMAIL | |
password = PASSWORD | |
try: | |
server = smtplib.SMTP(smtp_server,port) | |
server.starttls(context=ssl.create_default_context()) | |
server.login(sender_email, password) | |
server.sendmail(sender_email, sender_email, body) | |
print(f'Email sent for {address}') | |
except Exception as e: | |
print(body) | |
print(e) | |
print(f'Email could not be sent for {address}') | |
finally: | |
print('-----------------------------------------') | |
already_sent = load_apartments_from_file() | |
print(COLLECT_RUNTIME) | |
for candidate in COLLECT_RUNTIME: | |
for apartment in already_sent: | |
if apartment['address'] == candidate['address'] and apartment['date'] == candidate['date']: | |
print('-------------Duplicate apartment-------------') | |
print(f'Address:{apartment["address"]}') | |
print(f'Apartment Type: {apartment["apartmentType"]}') | |
print(f'Date: {apartment["date"]}') | |
print('-----------------------------------------') | |
continue | |
else: | |
address = candidate['address'] | |
link = candidate['link'] | |
apartment_type = candidate['apartmentType'] | |
date = candidate['date'] | |
credits = candidate['credits'] | |
print('-------------Found apartment-------------') | |
print(f'Address:{address}') | |
print(f'Apartment Type: {apartment_type}') | |
print(f'Date: {date}') | |
send_email(EMAIL_BODY.format(address=address.encode('utf-8'), link=link, date=date, apartment_type=apartment_type, credits=credits)) | |
with open('apartments.json', 'w') as f: | |
already_sent.extend(COLLECT_RUNTIME) | |
json.dump(already_sent, f, indent=2) | |
COLLECT_RUNTIME.clear() | |
time.sleep(RERUN_AFTER_SECONDS) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment