Created
August 20, 2019 16:01
-
-
Save linuskohl/e5bfba277499e6938f870fe02e772adf to your computer and use it in GitHub Desktop.
Tiny script to send notification emails on new openings on H-Soz-Kult
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import os | |
import feedparser | |
import requests | |
import sqlite3 | |
from sqlite3 import IntegrityError | |
from lxml import html | |
import lxml | |
from string import Template | |
from smtplib import SMTP | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
__author__ = "Linus Kohl" | |
__email__ = "[email protected]" | |
__license__ = "GPLv3" | |
# Mail settings | |
smtp = SMTP() | |
smtp_server = 'XXX' | |
smtp_port = 587 | |
smtp_username = 'XXX' | |
smtp_password = 'XXX' | |
# thats where to send the emails to | |
recipient = 'XXX' | |
# email of the sender | |
sender = 'XXX' | |
subject = "New opening" | |
template = './message.txt' | |
db_file_name = "./stellen.db" | |
hsozkult_feed = "https://www.hsozkult.de/job/rss?page=2" | |
# XPaths | |
XPATH_TITLE = '//h2/text()' | |
XPATH_LOCATION = '//div[contains(@class, \'hfn-item-metafull\')]/div[1]/div[2]/text()' | |
XPATH_INSTITUTION = '//div[contains(@class, \'hfn-item-metafull\')]/div[2]/div[2]/text()' | |
XPATH_DEADLINE = '//div[contains(@class, \'hfn-item-metafull\')]/div[3]/div[2]/text()' | |
XPATH_LINK = '//div[contains(@class, \'hfn-item-metafull\')]/div[4]/div[2]/a/text()' | |
XPATH_TYPE = '//*[@id="hfn-item-sidebar-metainfo"]/div[6]/div[2]/a/text()' | |
XPATH_CONTENT = '//div[contains(@class, \'hfn-item-fulltext\')]/descendant::*/text()' | |
def init_db(connection): | |
cursor = connection.cursor() | |
sql_command = """CREATE TABLE processed (link VARCHAR(500) PRIMARY KEY);""" | |
cursor.execute(sql_command) | |
def connect_db(): | |
if not os.path.isfile(db_file_name): | |
connection = sqlite3.connect(db_file_name) | |
init_db(connection) | |
return connection | |
else: | |
return sqlite3.connect(db_file_name) | |
def already_processed(connection, link): | |
cursor = connection.cursor() | |
cursor.execute("SELECT * FROM processed WHERE link=\"" + link + "\";") | |
result = cursor.fetchone() | |
return result is not None | |
def set_processed(connection, link): | |
try: | |
cursor = connection.cursor() | |
format_str = """INSERT INTO processed (link) VALUES ("{link}");""" | |
sql_command = format_str.format(link=link) | |
cursor.execute(sql_command) | |
connection.commit() | |
except IntegrityError: | |
pass | |
def read_template(filename): | |
with open(filename, 'r', encoding='utf-8') as template_file: | |
template_file_content = template_file.read() | |
return Template(template_file_content) | |
def extract_information(dom, path): | |
info = None | |
try: | |
info = dom.xpath(path)[0].strip().replace('\t', '') | |
except: | |
pass | |
return info | |
def parse_feed(url): | |
feed = feedparser.parse(url) | |
return feed.entries | |
def process_position(url): | |
req = requests.get(url) | |
dom = html.fromstring(req.text) | |
data = {} | |
data['title'] = extract_information(dom, XPATH_TITLE) | |
data['location'] = extract_information(dom, XPATH_LOCATION) | |
data['institution'] = extract_information(dom, XPATH_INSTITUTION) | |
data['deadline'] = extract_information(dom, XPATH_DEADLINE) | |
data['link'] = extract_information(dom, XPATH_LINK) | |
data['type'] = extract_information(dom, XPATH_TYPE) | |
content = extract_information(dom, XPATH_CONTENT) | |
return data | |
def send_email(data, recipient): | |
try: | |
msg = MIMEMultipart() | |
message = mail_template.substitute(TITLE=data['title'], | |
POSITION=data['type'], | |
LOCATION=data['location'], | |
INSTITUTION=data['institution'], | |
DEADLINE=data['deadline'], | |
LINK=data['link']) | |
msg['From'] = sender | |
msg['To'] = recipient | |
msg['Subject'] = subject | |
msg.attach(MIMEText(message, 'plain')) | |
smtp.send_message(msg) | |
except: | |
pass | |
def process_feed(connection, entries, recipient): | |
for entry in entries: | |
try: | |
if not already_processed(connection, entry.link): | |
data = process_position(entry.link) | |
send_email(data, recipient) | |
set_processed(connection, entry.link) | |
except: | |
pass | |
smtp.connect(smtp_server, port=smtp_port) | |
smtp.ehlo() | |
smtp.starttls() | |
smtp.ehlo() | |
smtp.login(smtp_username, smtp_password) | |
mail_template = read_template(template) | |
connection = connect_db() | |
entries = parse_feed(hsozkult_feed) | |
process_feed(connection, entries, recipient) | |
connection.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment