Skip to content

Instantly share code, notes, and snippets.

@arthurdarcet
Last active August 29, 2015 14:27
Show Gist options
  • Save arthurdarcet/e9e7d8c80afe1c985467 to your computer and use it in GitHub Desktop.
Save arthurdarcet/e9e7d8c80afe1c985467 to your computer and use it in GitHub Desktop.
Le Bon Coin watcher
#!/usr/bin/env python3
import argparse
import bs4
import email.mime.text
import logging
import logging.config
import re
import requests
import smtplib
import time
import urllib.parse
SLEEP = 30 # seconds
SMTP_SERVER = 'localhost'
EMAIL_SUBJECT = 'lbc-watcher: new match for search {q}'
EMAIL_FROM = '[email protected]'
EMAIL_TEMPLATE = """
<div style="background-color: #F8F8F8; padding-top: 40px; padding-bottom: 60px;">
<div style="max-width: 550px; margin: auto; background-color: #FFF; padding: 20px 50px;">
<p>The following items matched your search for <code>{q}</code>:</p>
<ul style="list-style: none; margin-top: 30px;">{items}</ul>
</div>
</div>
"""
EMAIL_ITEM_TEMPLATE = """
<li style="border-bottom: 1px solid #ddd; margin: 20px 0; clear: both; height: 120px">
<img src="{image}" style="max-height: 100px; max-width: 180px; margin-right: 40px; float: left;">
<a href="{url}" style="float: left; padding-top: 40px; display: inline-block; max-width="280px">{title}</a>
<span style="margin-left: 30px; color: #999; float: right;">{price}</span>
</li>
"""
logger = logging.root
class BC:
_url = 'http://www.leboncoin.fr'
_link = re.compile(r'{}/[a-z0-9]+/([0-9]{{8,12}})\.htm'.format(_url))
def __init__(self, region):
self.region = region
def search(self, q, until=None):
q = urllib.parse.urlencode({'q': q})
soup = self._req('annonces/offres/{}/?{}'.format(self.region, q))
for a in soup.select('div.list-lbc a'):
m = self._link.match(a.get('href', ''))
if m is not None:
id = m.group(1)
if until is not None and until == id:
return
logger.debug('Found id %r for search %r', id, q)
# The category is useless, unneeded even to get the details page
def g(s, fn, *args, **kwargs):
els = a.select(s)
if not els: return None
return getattr(els[0], fn)(*args, **kwargs)
yield {
'id': id,
'url': a.get('href', None),
'title': g('.title', 'get_text', strip=True),
'price': g('.price', 'get_text', strip=True),
'image': g('.image img', 'get', 'src', None),
}
def _req(self, p):
url = '{}/{}'.format(self._url, p)
logger.debug('LeBonCoin request: %r', url)
req = requests.get(url)
return bs4.BeautifulSoup(req.content, 'html.parser')
class RollingBC(BC):
def __init__(self, *args, **kwargs):
self.seen = {}
super().__init__(*args, **kwargs)
def search(self, q):
last = self.seen.get(q, None)
new = list(super().search(q, until=last))
if not new: return
self.seen[q] = new[0]['id']
return new
def send_alert(to, q, items):
logger.debug('Notifying %s with %s items for search %r', to, len(items), q)
content = EMAIL_TEMPLATE.format(
q=q,
items=''.join(EMAIL_ITEM_TEMPLATE.format(**it) for it in items),
)
msg = email.mime.text.MIMEText(content, 'html')
msg['Subject'] = EMAIL_SUBJECT.format(q=q)
msg['From'] = EMAIL_FROM
msg['To'] = to
smtp = smtplib.SMTP(SMTP_SERVER)
smtp.send_message(msg)
smtp.close()
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--debug', action='store_true', help='Log debug messages')
parser.add_argument('-r', '--region', nargs='?', help='Region', default='ile_de_france')
parser.add_argument('-e', '--email', nargs='?', help='Email to send the alert to', default=None)
parser.add_argument('searches', nargs='+', help='Searches to perform')
args = parser.parse_args()
logging.config.dictConfig({
'version': 1,
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'clean',
},
},
'formatters': {
'clean': {
'format' : '{asctime} | {levelname:^8} | {message}',
'datefmt' : '%Y-%m-%d %H:%M:%S',
'style': '{',
},
},
'root': {
'handlers': ['console'],
'level': 'DEBUG' if args.debug else 'INFO',
}
})
bc = RollingBC(args.region)
for q in args.searches:
bc.search(q)
while True:
time.sleep(SLEEP)
logger.debug('Wakeup')
for q in args.searches:
new = bc.search(q)
if not new: continue
for it in new:
logger.info('Found new result for search %r: %s', q, it)
if args.email:
send_alert(args.email, q, new)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment