-
-
Save show0k/ff386eb6a67b4c86a2c6 to your computer and use it in GitHub Desktop.
Parseur de leboncoin : permet de s'envoyer des emails lors de nouvelles annonces sur une recherche précise
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import urllib | |
import requests | |
import re | |
import sqlite3 | |
import os | |
import argparse | |
import gettext | |
import logging | |
import logging.handlers | |
from datetime import datetime | |
from bs4 import BeautifulSoup as bs | |
logger = logging.getLogger("debug") | |
logger.setLevel(logging.INFO) | |
formatter = logging.Formatter("%(asctime)s | %(levelname)8s | %(message)s") | |
stStdout = logging.StreamHandler() | |
stStdout.setFormatter(formatter) | |
logger.addHandler(stStdout) | |
lbcaDir= os.environ['HOME']+"/.lbca" | |
logsDir = lbcaDir + "/logs" | |
if not os.path.isdir(logsDir): | |
if not os.path.isdir(lbcaDir): | |
os.mkdir(lbcaDir) | |
os.mkdir(logsDir) | |
stLogfile = logging.handlers.RotatingFileHandler(logsDir+'/log', maxBytes=256*1024, backupCount=10) | |
stLogfile.setFormatter(formatter) | |
#stLogfile.doRollover() | |
logger.addHandler(stLogfile) | |
parser = argparse.ArgumentParser(description="Le bon coin alert generator") | |
parser.add_argument("-r", metavar="region", dest="region", help="Region", default="ile_de_france") | |
parser.add_argument("-s", metavar="search", dest="searches", nargs="+", help="Searches to perform", | |
default=["Atlantic F18", "Sauter Lucki"]) | |
parser.add_argument("-e", metavar="email-to", dest="email_to", help="Email to send it to", | |
default="[email protected]") | |
parser.add_argument("-f", metavar="email-from", dest="email_from", help="Email to send it from", | |
default="[email protected]") | |
parser.add_argument("-u", metavar="email-subject", dest="email_subject", help="Email's subject", | |
default="An article matched your search") | |
parser.add_argument("--smtp-server", metavar="smtp-server", dest="server", help="SMTP server to use", | |
default="localhost") | |
parser.add_argument("--url", metavar="url", dest="url", nargs='+', help="Listes d'Url") | |
parser.add_argument("--gui", dest="gui", action="store_true", help="Use WxWidget GUI") | |
args = parser.parse_args() | |
# LBC specific | |
baseUrl = "http://www.leboncoin.fr/annonces/offres/"+args.region+"/?" | |
linkRegex = re.compile("http://www.leboncoin.fr/[a-z0-9]+/[0-9]{8,12}\\.htm") | |
# This is for requests handling | |
def searchToLinks(search): | |
logger.info("Searching for {search}".format(search=search)) | |
links = [] | |
url = baseUrl + urllib.urlencode({"q": search}) | |
pageSoup = bs(requests.get(url).text) | |
# We search all the link | |
for i, aTag in enumerate(pageSoup.findAll('a')): | |
href = aTag.get('href') | |
if href: | |
# And perform a link target matching | |
if linkRegex.match(href): | |
links.append(href) | |
return links | |
def searchesToLinks(searches): | |
links = [] | |
for search in searches: | |
for link in searchToLinks(search): | |
links.append(link) | |
return links | |
def searchToLinksFromUrl(url): | |
logger.info("Searching for {url}".format(url=url)) | |
links = [] | |
pageSoup = bs(requests.get(url).text) | |
# We search all the link | |
for i, aTag in enumerate(pageSoup.findAll('a')): | |
href = aTag.get('href') | |
if href: | |
# And perform a link target matching | |
if linkRegex.match(href): | |
links.append(href) | |
return links | |
def searchesToLinksFromUrl(urls): | |
links = [] | |
for url in urls: | |
for link in searchToLinksFromUrl(url): | |
links.append(link) | |
return links | |
# DB Preparation | |
db = sqlite3.connect(lbcaDir+"/db") | |
db.execute(""" | |
CREATE TABLE IF NOT EXISTS links ( | |
url TEXT UNIQUE, | |
date DATETIME, | |
seen BOOL DEFAULT 0, | |
nb_views INTEGER, | |
emailed BOOL DEFAULT 0 | |
); | |
""") | |
gettext.install("lbca") | |
logger.info("Start !") | |
if args.gui: | |
# GUI mode: We should use a timer and a notification to make it really useful | |
import wx | |
import gettext | |
class LBCMainFrame(wx.Frame): | |
def __init__(self, *args, **kwds): | |
# begin wxGlade: MyFrame1.__init__ | |
wx.Frame.__init__(self, *args, **kwds) | |
self.itemsList = wx.ListCtrl(self, wx.ID_ANY, style=wx.LC_REPORT | wx.SUNKEN_BORDER) | |
self.itemsList.InsertColumn(0, "ID") | |
self.itemsList.InsertColumn(1, "URL") | |
self.itemsList.InsertColumn(2, "Date") | |
self.itemsList.InsertColumn(3, "Seen") | |
self.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.selectItem, self.itemsList) | |
# Menu Bar | |
self.generalMenuBar = wx.MenuBar() | |
self.General = wx.Menu() | |
self.Update = wx.MenuItem(self.General, 2, _("Refresh"), _("Refresh"), wx.ITEM_NORMAL) | |
self.General.AppendItem(self.Update) | |
self.generalMenuBar.Append(self.General, _("General")) | |
self.SetMenuBar(self.generalMenuBar) | |
# Menu Bar end | |
self.__set_properties() | |
self.__do_layout() | |
self.Bind(wx.EVT_MENU, self.refreshItems, self.Update) | |
# end wxGlade | |
def __set_properties(self): | |
# begin wxGlade: MyFrame1.__set_properties | |
self.SetTitle(_("LBC Alert")) | |
self.itemsList.SetMinSize((600, 400)) | |
#self.SetSize((400, 300)) | |
# end wxGlade | |
def __do_layout(self): | |
# begin wxGlade: MyFrame1.__do_layout | |
self.itemsList.SetSize(wx.Size(400, 300)) | |
itemsListSize = wx.BoxSizer(wx.VERTICAL) | |
itemsListSize.Add(self.itemsList, 1, wx.EXPAND, 0) | |
self.SetSizer(itemsListSize) | |
itemsListSize.Fit(self) | |
self.Layout() | |
# end wxGlade | |
def loadItems(self, event): | |
self.itemsList.DeleteAllItems() | |
for row in db.execute("select rowid, url, date, seen from links order by seen desc, rowid desc limit 20;"): | |
p = self.itemsList.InsertStringItem(0, str(row[0])) | |
self.itemsList.SetStringItem(p, 1, row[1]) | |
self.itemsList.SetStringItem(p, 2, row[2]) | |
if row[3]: | |
msg = "Yes" | |
else: | |
msg = "No" | |
self.itemsList.SetStringItem(p, 3, msg) | |
def selectItem(self, event): | |
id = event.GetText() | |
row = db.execute("select url from links where rowid=?;", (id,)).fetchone() | |
os.system("sensible-browser "+row[0]) | |
db.execute("update links set seen=1, nb_views=nb_views+1 where rowid=?;", (id,)) | |
db.commit() | |
event.Skip() | |
self.loadItems(event) | |
def refreshItems(self, event): | |
# We save all links | |
for link in searchesToLinks(args.searches): | |
#print "Saving "+link | |
db.execute("insert or ignore into links ('url','date') values (?,?);", (link, datetime.now())) | |
db.commit() | |
self.loadItems(event) | |
if __name__ == "__main__": | |
app = wx.PySimpleApp(0) | |
wx.InitAllImageHandlers() | |
mainFrame = LBCMainFrame(None, wx.ID_ANY, "") | |
app.SetTopWindow(mainFrame) | |
mainFrame.Show() | |
mainFrame.loadItems(None) | |
app.MainLoop() | |
else: | |
# Email mode | |
import smtplib | |
from email.mime.text import MIMEText | |
# We save all links | |
links = [] | |
logger.info("Url = "+ args.url[0]) | |
if args.url : | |
links = searchesToLinksFromUrl(args.url) | |
else : | |
links = searchesToLinks(args.searches) | |
for link in links : | |
#print "Saving "+link | |
db.execute("insert or ignore into links ('url','date') values (?,?);", (link, datetime.now())) | |
db.commit() | |
nb = 0 | |
text = '<ul>\n' | |
for rowid, url in db.execute("select rowid, url from links where emailed=0;"): | |
logger.info("We have new link : {link}.".format(link=url)) | |
text += '<li><a href="{url}">{url} (#{id})</a></li>\n'.format(id=rowid, url=url) | |
db.execute("update links set emailed=1 where rowid=?", (rowid,)) | |
nb += 1 | |
text += '</ul>\n' | |
if nb > 0: | |
text = _("We found {nb} articles matching your searches:<br />".format(nb=nb)) + '\n' + text | |
smtp = smtplib.SMTP(args.server) | |
msg = MIMEText(text, 'html') | |
msg['Subject'] = args.email_subject | |
msg['From'] = args.email_from | |
msg['To'] = args.email_to | |
smtp.sendmail(msg['From'], [msg['To']], msg.as_string()) | |
logger.info("We are sending the following email:\n {email}".format(email=msg.as_string())) | |
db.commit() | |
smtp.quit() | |
else: | |
logger.info("We don't have anything to send !") | |
db.close() | |
logger.info("End !") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment