Last active
December 15, 2015 20:59
-
-
Save mbiette/5322301 to your computer and use it in GitHub Desktop.
This really simple script written in Python that send by email job offers published on a RSS feed. (I left the RSS feed that work well with the name of fields I put) It runs every 5 minutes. It downloads a RSS feed with feedparser,
put the content in a simple object named offer,
track new offers and save them in a pickle file,
send an email with…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from offer import offer | |
import psycopg2 | |
class dbOffer: | |
def __init__(self,sourcename,host="localhost",port="5432",database="rsstracker",user="rsstracker",password="rsstracker"): | |
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) | |
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) | |
self.conn = psycopg2.connect(host=host,port=port,database=database,user=user,password=password) | |
self.sourcename = sourcename | |
def prepareOffer(self,offer): | |
return (offer.link,offer.description,offer.date,offer.sent_message_id,offer.reply_message_id,offer.deleted,self.sourcename,offer.title) | |
def convertToOffer(self,sqlArray): | |
o = offer(sqlArray[1],sqlArray[2],sqlArray[3],sqlArray[4]) | |
o.sent_message_id = sqlArray[5] | |
o.reply_message_id = sqlArray[6] | |
o.deleted = sqlArray[7] | |
return o | |
def insertOfferList(self,offerList): | |
sqlList = [] | |
for offer in offerList: | |
sqlList.append(self.prepareOffer(offer)) | |
cur = self.conn.cursor() | |
cur.executemany("INSERT INTO offers (link,description,date,sent_message_id,reply_message_id,deleted,source,title) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)",sqlList) | |
self.conn.commit() | |
def updateOfferList(self,offerList): | |
sqlList = [] | |
for offer in offerList: | |
sqlList.append(self.prepareOffer(offer)) | |
cur = self.conn.cursor() | |
cur.executemany("UPDATE offers SET link=%s, description=%s, date=%s, sent_message_id=%s, reply_message_id=%s, deleted=%s WHERE source=%s AND title=%s",sqlList) | |
self.conn.commit() | |
def getValidOfferList(self): | |
cur = self.conn.cursor() | |
cur.execute("SELECT * FROM offers WHERE source=%s AND deleted IS NOT TRUE",(self.sourcename,)) | |
sqlArray = cur.fetchall() | |
offerList = [] | |
for array in sqlArray: | |
offerList.append(self.convertToOffer(array)) | |
return offerList | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE DATABASE rsstracker | |
WITH ENCODING='UTF8' | |
CONNECTION LIMIT=-1; | |
CREATE ROLE rsstracker LOGIN ENCRYPTED PASSWORD 'md530f6cd783e03a6ab3e9e3a601400cbfa' | |
VALID UNTIL 'infinity'; | |
CREATE ROLE rsstracker_group | |
VALID UNTIL 'infinity'; | |
ALTER DEFAULT PRIVILEGES | |
GRANT INSERT, SELECT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER ON TABLES | |
TO rsstracker_group; | |
ALTER DEFAULT PRIVILEGES | |
GRANT SELECT, UPDATE, USAGE ON SEQUENCES | |
TO rsstracker_group; | |
ALTER DEFAULT PRIVILEGES | |
GRANT EXECUTE ON FUNCTIONS | |
TO rsstracker_group; | |
CREATE TABLE offers | |
( | |
source varchar(10), | |
title varchar(100), | |
link varchar(200), | |
description text, | |
date varchar(50), | |
sent_message_id varchar(50), | |
reply_message_id varchar(50), | |
deleted boolean, | |
primary key (source,title) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser | |
import time,datetime | |
from offer import offer | |
from db import dbOffer | |
from listOffers import listOffers | |
from mailOffer import mailOffer | |
#import pprint | |
if __name__ == '__main__': | |
# Url for the RSS feed | |
rss_url = "https://careers.societegenerale.com/groupe/fr/rss-offre.html?0&zone=83&lang=fr&JOBTYPE_VIE=34" | |
# Setting up objects | |
dbo = dbOffer('socgen') | |
lo = listOffers(dbo) | |
mo = mailOffer(email_from = '', | |
email_to = '', | |
server_smtp = '', | |
login = '', | |
password = '', | |
subject_keyword = 'SG') | |
while(True): | |
# Getting information from the web | |
print datetime.datetime.now() | |
feed = feedparser.parse(rss_url) | |
if len(feed['items']) > 0: # Avoid resending ALL the offer when feed is empty. | |
# Traking changes | |
for item in feed['items']: | |
#pprint.pprint(item) | |
o = offer(item['title'],item['link'],item['description'],item['updated']) | |
lo.trackOffer(o) | |
# Simple output | |
print 'list',len(lo.listOffers) | |
print 'new',len(lo.newOffers) | |
print 'del',len(lo.delOffers) | |
# Sending new offers by email | |
for item in lo.newOffers: | |
mo.sendOffer(item) | |
time.sleep(2) # Wait in order not to flood STMP server and/or the recipient mail server | |
pass | |
# Sending del offers by email | |
for item in lo.delOffers: | |
mo.delOffer(item) | |
time.sleep(2) | |
pass | |
# Reseting the traking and saving | |
lo.saveList() | |
lo.resetTracking() | |
# Waiting | |
time.sleep(300) # every 5 minutes = 300 seconds |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cPickle as pickle | |
from db import dbOffer | |
class listOffers: | |
''' listOffers tracks if there are new offers and deleted ones. It also keep track of the list in a pickle file ''' | |
def __init__(self,db): | |
self.db = db | |
self.listOffers = None | |
self.newOffers = [] | |
self.delOffers = [] | |
self.loadList() | |
self.resetTracking() | |
def loadList(self): | |
self.listOffers = self.db.getValidOfferList() | |
def saveList(self): | |
self.db.insertOfferList(self.newOffers) | |
for o in self.delOffers: | |
o.deleted = True | |
self.db.updateOfferList(self.delOffers) | |
def resetTracking(self): | |
self.listOffers.extend(self.newOffers) | |
for off in self.delOffers: | |
self.listOffers.remove(off) | |
self.newOffers = [] | |
self.delOffers = [item for item in self.listOffers] | |
def trackOffer(self,offer): | |
try: | |
self.delOffers.remove(offer) | |
except ValueError: | |
self.newOffers.append(offer) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import smtplib | |
import pprint | |
from bs4 import BeautifulSoup | |
from email.utils import make_msgid | |
from email.mime.text import MIMEText | |
from email.mime.multipart import MIMEMultipart | |
from email import charset | |
from offer import offer | |
class mailOffer: | |
''' mailOffer set up everything to be able to send offers by email ''' | |
def __init__(self, email_from, email_to, server_smtp, login, password, subject_keyword): | |
self.email_from = email_from | |
self.email_to = email_to | |
self.server_smtp = server_smtp | |
self.login = login | |
self.password = password | |
self.subject_keyword = subject_keyword | |
charset.add_charset('utf-8', charset.SHORTEST, charset.QP) | |
def sendOffer(self,o): | |
# Create message container - the correct MIME type is multipart/alternative. | |
msg = MIMEMultipart('alternative') | |
msg['Subject'] = "[VIE "+self.subject_keyword+" "+o.date+"] "+o.title | |
msg['From'] = self.email_from | |
msg['To'] = self.email_to | |
if o.sent_message_id is None: | |
o.sent_message_id = make_msgid(); | |
msg['Message-ID'] = o.sent_message_id; | |
print msg['Message-ID'] | |
# Create the body of the message (a plain-text and an HTML version). | |
text = o.link + "\n\n" + BeautifulSoup(o.description).get_text() | |
html = """\ | |
<html> | |
<head></head> | |
<body> | |
<p>"""+o.link+"""</p><br> | |
"""+o.description+""" | |
</body> | |
</html> | |
""" | |
# Record the MIME types of both parts - text/plain and text/html. | |
part1 = MIMEText(text.encode('utf-8'), 'plain', _charset='utf-8') | |
part2 = MIMEText(html.encode('utf-8'), 'html', _charset='utf-8') | |
# Attach parts into message container. | |
# According to RFC 2046, the last part of a multipart message, in this case | |
# the HTML message, is best and preferred. | |
msg.attach(part1) | |
msg.attach(part2) | |
# Send the message via local SMTP server. | |
s = smtplib.SMTP(self.server_smtp) | |
s.starttls() | |
s.login(self.login,self.password) | |
# sendmail function takes 3 arguments: sender's address, recipient's address | |
# and message to send - here it is sent as one string. | |
s.sendmail(self.email_from, self.email_to, msg.as_string()) | |
print 'Sent email', o.title | |
s.quit() | |
def delOffer(self,o): | |
# Create message container - the correct MIME type is multipart/alternative. | |
msg = MIMEMultipart('alternative') | |
msg['Subject'] = "RE: [VIE "+self.subject_keyword+" "+o.date+"] "+o.title | |
msg['From'] = self.email_from | |
msg['To'] = self.email_to | |
if o.reply_message_id is None: | |
o.reply_message_id = make_msgid(); | |
if o.sent_message_id is not None: | |
msg['References'] = o.sent_message_id; | |
msg['In-Reply-To'] = o.sent_message_id; | |
msg['Message-ID'] = o.reply_message_id; | |
# Create the body of the message (a plain-text and an HTML version). | |
text = "DELETED" | |
html = """\ | |
<html> | |
<head></head> | |
<body> | |
<h1>DELETED</h1> | |
</body> | |
</html> | |
""" | |
# Record the MIME types of both parts - text/plain and text/html. | |
part1 = MIMEText(text.encode('utf-8'), 'plain', _charset='utf-8') | |
part2 = MIMEText(html.encode('utf-8'), 'html', _charset='utf-8') | |
# Attach parts into message container. | |
# According to RFC 2046, the last part of a multipart message, in this case | |
# the HTML message, is best and preferred. | |
msg.attach(part1) | |
msg.attach(part2) | |
# Send the message via local SMTP server. | |
s = smtplib.SMTP(self.server_smtp) | |
s.starttls() | |
s.login(self.login,self.password) | |
# sendmail function takes 3 arguments: sender's address, recipient's address | |
# and message to send - here it is sent as one string. | |
s.sendmail(self.email_from, self.email_to, msg.as_string()) | |
print 'Sent email DEL', o.title | |
s.quit() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class offer: | |
''' Container for offers' information ''' | |
def __init__(self,title,link,description,date): | |
self.title = title | |
self.link = link | |
self.description = description | |
self.date = date | |
self.sent_message_id = None | |
self.reply_message_id = None | |
self.deleted = False | |
def __eq__(self, other): | |
return self.title == other.title | |
def __str__(self): | |
return str(title) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment