Skip to content

Instantly share code, notes, and snippets.

@algomaster99
Last active June 13, 2023 14:21
Show Gist options
  • Save algomaster99/c9344a4dbaa78731afd33162688e5c6a to your computer and use it in GitHub Desktop.
Save algomaster99/c9344a4dbaa78731afd33162688e5c6a to your computer and use it in GitHub Desktop.
SSSB scraper
# Run `pip install selenium` before starting script
import datetime
import json
import os
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException
COLLECT_RUNTIME = []
PASSWORD = '[REDACTED]'
EMAIL = '[REDACTED]'
DATE = datetime.date(2023, 3, 16)
RERUN_AFTER_SECONDS = 60 * 60 * 0.5
while True:
areas = [
'Apeln',
'Domus',
'Embla',
'FORUM',
'Hugin',
'Idun',
'Jerum',
'Kungsh',
'Lappis',
'Nypone',
'Pax',
'Roslag',
'STRIX',
]
def load_apartments_from_file():
if os.stat('apartments.json').st_size != 0:
with open('apartments.json', 'r') as f:
return json.load(f)
return []
URL = 'https://sssb.se/en/looking-for-housing/apply-for-apartment/available-apartments/'
browser = webdriver.Firefox()
browser.get('https://sssb.se/en/looking-for-housing/apply-for-apartment/available-apartments/')
DELAY_SECONDS = 10
hide_cookies = '''
document.querySelector('.cc_container').style.display = 'none';
'''
def update_apartment_set():
main_class_name = 'ObjektListItem '
apartments = browser.find_elements(By.CLASS_NAME, main_class_name)
for apartment in apartments:
address_container = apartment.find_element(By.CLASS_NAME, 'ObjektAdress')
address_link = address_container.find_element(By.TAG_NAME, 'a')
agreement_start = apartment.find_element(By.CSS_SELECTOR, 'dd.ObjektInflytt')
type_container = apartment.find_element(By.CLASS_NAME, 'ObjektTyp')
type_link = type_container.find_element(By.TAG_NAME, 'a')
credits = apartment.find_element(By.CSS_SELECTOR, 'dd.ObjektAntalIntresse')
d = {
'address': address_link.text,
'link': address_link.get_attribute('href'),
'apartmentType': type_link.text,
'date': agreement_start.text,
'credits': credits.text
}
yyyy, mm, dd = d['date'].split('-')
agreement_date = datetime.date(int(yyyy), int(mm), int(dd))
apartments = load_apartments_from_file()
for apartment in apartments:
if apartment['address'] == d['address'] and apartment['date'] == d['date']:
break
else:
if agreement_date > DATE and d['apartmentType'] != 'Corridor room':
COLLECT_RUNTIME.append(d)
while True:
try:
next_button_container = WebDriverWait(browser, DELAY_SECONDS).until(
EC.presence_of_element_located((By.CLASS_NAME, 'PaginationNextExists'))
)
address_link = next_button_container.find_element(By.TAG_NAME, 'a')
browser.execute_script(hide_cookies)
address_link.click()
WebDriverWait(browser, DELAY_SECONDS).until(
EC.presence_of_element_located((By.CLASS_NAME, 'ObjektListItem '))
)
update_apartment_set()
except (NoSuchElementException, TimeoutException):
break
browser.close()
EMAIL_BODY = u'''Subject: [SSSB] {date} {address}
Hi,
New accommodation available at SSSB.
Address: {address}
Link: {link}
Type: {apartment_type}
Date: {date}
Credits: {credits}
Regards,
SSSB Bot
'''
def send_email(body):
import smtplib, ssl
smtp_server = "smtp.gmail.com"
port = 587 # For starttls
sender_email = EMAIL
password = PASSWORD
try:
server = smtplib.SMTP(smtp_server,port)
server.starttls(context=ssl.create_default_context())
server.login(sender_email, password)
server.sendmail(sender_email, sender_email, body)
print(f'Email sent for {address}')
except Exception as e:
print(body)
print(e)
print(f'Email could not be sent for {address}')
finally:
print('-----------------------------------------')
already_sent = load_apartments_from_file()
print(COLLECT_RUNTIME)
for candidate in COLLECT_RUNTIME:
for apartment in already_sent:
if apartment['address'] == candidate['address'] and apartment['date'] == candidate['date']:
print('-------------Duplicate apartment-------------')
print(f'Address:{apartment["address"]}')
print(f'Apartment Type: {apartment["apartmentType"]}')
print(f'Date: {apartment["date"]}')
print('-----------------------------------------')
continue
else:
address = candidate['address']
link = candidate['link']
apartment_type = candidate['apartmentType']
date = candidate['date']
credits = candidate['credits']
print('-------------Found apartment-------------')
print(f'Address:{address}')
print(f'Apartment Type: {apartment_type}')
print(f'Date: {date}')
send_email(EMAIL_BODY.format(address=address.encode('utf-8'), link=link, date=date, apartment_type=apartment_type, credits=credits))
with open('apartments.json', 'w') as f:
already_sent.extend(COLLECT_RUNTIME)
json.dump(already_sent, f, indent=2)
COLLECT_RUNTIME.clear()
time.sleep(RERUN_AFTER_SECONDS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment