|
#!/usr/bin/env python |
|
# -%- coding: utf-8 -%- |
|
|
|
from __future__ import unicode_literals |
|
from selenium import webdriver |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from selenium.webdriver.support import expected_conditions as EC |
|
from selenium.webdriver.common.by import By |
|
|
|
from selenium.common.exceptions import ( |
|
NoSuchElementException, |
|
TimeoutException, |
|
) |
|
|
|
import urlparse |
|
import re |
|
import os |
|
import datetime |
|
import pprint |
|
import json |
|
import traceback |
|
import requests |
|
import time |
|
import sys |
|
import codecs |
|
sys.stdout = codecs.getwriter('utf8')(sys.stdout) |
|
|
|
class LOGIN: |
|
password = '' #your password on Ebay small ads |
|
email = '' #your email on Ebay small ads |
|
|
|
#Replace this by the search you're intersted in (do a manual search and copy/paste URL) |
|
search_url = 'http://kleinanzeigen.ebay.de/anzeigen/s-wohnung-mieten/berlin/anbieter:privat/anzeige:angebote/c203l3331+wohnung_mieten.zimmer_i:2,3' |
|
slack_url = None #if you have Slack, put a Webhook URL here and you will get notified if the bot finds something interesting. |
|
|
|
#Here we will keep ads that we have visited already... |
|
db_filename = 'ads.json' |
|
|
|
def send_slack_message(text): |
|
payload = {'text' : text,'mrkdwn' : True} |
|
if slack_url is None: |
|
return |
|
try: |
|
response = requests.post(slack_url,data = {'payload' : json.dumps(payload)}) |
|
except: |
|
print "Can't deliver message to Slack!" |
|
|
|
def load_db(): |
|
ads = [] |
|
if not os.path.exists(db_filename): |
|
return [] |
|
with open(db_filename,"r") as input_file: |
|
for line in input_file: |
|
ads.append(json.loads(line)) |
|
return ads |
|
|
|
def save_db(ads): |
|
with open(db_filename,"w") as output_file: |
|
for ad in ads: |
|
try: |
|
output_file.write(json.dumps(ad)+"\n") |
|
except: |
|
print "Could not write entry!" |
|
continue |
|
|
|
def is_suitable(ad): |
|
""" |
|
This function determines if an ad is suitable or not. Modify according to your needs. |
|
""" |
|
if not 'Ort' in ad or not 'Zimmer' in ad or not 'rent' in ad or not 'Quadratmeter' in ad: |
|
return False |
|
if ad['rent'] is not None: |
|
try: |
|
rent = int(ad['rent']) |
|
if rent > 550 or rent < 300: |
|
return False |
|
except: |
|
return None |
|
else: |
|
return False |
|
try: |
|
if int(ad['Zimmer']) < 2 or int(ad['Zimmer']) > 3: |
|
return False |
|
except: |
|
return None |
|
try: |
|
if int(ad['Quadratmeter']) < 50 or int(ad['Quadratmeter']) > 90: |
|
return False |
|
except: |
|
return None |
|
exchange_regex = r"möbliert|alleinerziehende|Zwischenmiete|WBS|Wohnberechtigungsschein|Wohnungstausch|Tauschangebot|Tausch" |
|
if re.search(exchange_regex,ad['description'],re.I) or \ |
|
re.search(exchange_regex,ad['title'],re.I): |
|
return False |
|
if re.search(r"suche|sucht",ad['title'],re.I): |
|
return False |
|
if not re.search(ur'Wedding|Moabit|Mitte|Neuk[^\s]+lln|Tiergarten|Sch[^\s]+neberg|Treptow|Wilmersdorf|Tegel|Tempelhof|Charlottenburg|Friedrichshain|Prenzlauer\s+Berg|Steglitz|Friednau',ad['Ort'],re.I): |
|
return False |
|
no_go_zones = ur"Lichenrade|Lankwitz|Schmargendorf|Treptow|Karlshorst|Lichterfelde|Britz|Mariendorf" |
|
if re.search(no_go_zones,ad['Ort'],re.I) or re.search(no_go_zones,ad['title'],re.I) or re.search(no_go_zones,ad['description'],re.I): |
|
return False |
|
return True |
|
|
|
def notify_me_of(ad): |
|
my_ad = {} |
|
my_ad.update(ad) |
|
my_ad['description'] = "> "+ "\n> ".join(my_ad['description'].split("\n")) |
|
message =u""" |
|
## Neues Angebot: %(title)s |
|
|
|
%(url)s |
|
|
|
Zimmer: **%(Zimmer)s** |
|
Miete: **%(rent_str)s** |
|
Ort: **%(Ort)s** |
|
|
|
## Beschreibung |
|
|
|
%(description)s |
|
|
|
## Telefon |
|
|
|
**%(phone)s** |
|
|
|
""" % my_ad |
|
|
|
print message |
|
|
|
send_slack_message(message) |
|
|
|
#Modify according to your needs ;) |
|
contact_message =u"""Hallo, |
|
|
|
Ihre Anzeige klingt wirklich interessant! Ich bin auf der Suche nach einer 2/3-Zimmer Wohnung in Berlin, das Angebot passt da genau. [...] |
|
|
|
Falls ich auf Ihr Suchprofil passe würde ich mich sehr freuen, falls wir kurz telefonieren könnten um zu schauen, ob die Rahmenbedingungen stimmen und eventuell einen Besichtigungstermin zu vereinbaren. [...] |
|
|
|
Alle benötigten Unterlagen (Schufa, Einkommensnachweise, |
|
Mietschuldenfreiheit, Selbstauskunft, ...) für die Anmietung habe ich bereits vorliegen. |
|
|
|
Freue mich sehr über Ihre kurze Rückmeldung! |
|
|
|
Viele Grüße |
|
[your name] |
|
""" |
|
|
|
lines = contact_message.split(u"\n") |
|
contact_message = u"" |
|
|
|
for line in lines: |
|
if not line.strip(): |
|
contact_message+=u"\n\n" |
|
else: |
|
contact_message+=unicode(line.strip())+u" " |
|
|
|
print contact_message |
|
|
|
import time |
|
|
|
last_ping = None |
|
|
|
def contact(ad,browser): |
|
|
|
ad['contacted'] = True |
|
|
|
watchlist_element = browser.find_element_by_id('viewad-action-watchlist') |
|
if re.search(ur"hinzufügen",watchlist_element.text) is None: |
|
print "Has already been added to watchlist, skipping..." |
|
return |
|
else: |
|
print "Adding to watchlist" |
|
browser.find_element_by_id("viewad-lnk-watchlist").click() |
|
time.sleep(5) |
|
|
|
form = browser.find_element_by_id('viewad-contact-bottom-form') |
|
submit_button = browser.find_element_by_id('viewad-contact-bottom-submit') |
|
message_element = browser.find_element_by_id('viewad-contact-bottom-message') |
|
message_element.send_keys(contact_message) |
|
submit_button.click() |
|
send_slack_message("**Angeschrieben**: %s (%s)" % (ad['title'],ad['url'])) |
|
time.sleep(5) |
|
|
|
|
|
def get_attributes(browser): |
|
attribute_lists = browser.find_elements_by_xpath('//dl[contains(@class,"a-medium-width attributelist")]') |
|
attributes = {} |
|
for attribute_list in attribute_lists: |
|
current_name = None |
|
for item in attribute_list.find_elements_by_xpath('.//dd | .//dt'): |
|
if item.tag_name == 'dt': |
|
current_name = item.text.strip() |
|
if not current_name: |
|
continue |
|
if current_name[-1] == ':': |
|
current_name = current_name[:-1] |
|
elif current_name is not None: |
|
attributes[current_name] = item.text.strip() |
|
|
|
rent_str = browser.find_element_by_id('viewad-price').text |
|
|
|
attributes['rent_str'] = rent_str |
|
|
|
try: |
|
attributes['rent'] = re.match(r".*?(\d+)\s*EUR",rent_str).group(1) |
|
except: |
|
attributes['rent'] = None |
|
|
|
attributes['title'] = browser.find_element_by_id('viewad-title').text |
|
|
|
phone_number = browser.find_elements_by_xpath('//*[contains(@class,"phoneline-number")]') |
|
|
|
if len(phone_number): |
|
attributes['phone'] = phone_number[0].text |
|
else: |
|
attributes['phone'] = '' |
|
|
|
p_text = browser.find_element_by_id('viewad-description-text') |
|
attributes['description'] = p_text.text |
|
|
|
return attributes |
|
|
|
def check_ads(ads_by_id): |
|
|
|
browser = webdriver.Firefox() |
|
browser.set_page_load_timeout(60) |
|
try: |
|
browser.delete_all_cookies() |
|
|
|
if True: |
|
browser.get('http://kleinanzeigen.ebay.de/') |
|
login_field = browser.find_element_by_xpath("//*[contains(text(), 'Einloggen')]") |
|
login_field.click() |
|
browser.find_element_by_id('login-email').send_keys(LOGIN.email) |
|
browser.find_element_by_id('login-password').send_keys(LOGIN.password) |
|
browser.find_element_by_id('login-submit').click() |
|
|
|
browser.get(search_url) |
|
|
|
result_list = browser.find_element_by_id('srchrslt-adtable') |
|
result_items = result_list.find_elements_by_xpath(".//li") |
|
|
|
links = {} |
|
|
|
for result_item in result_items: |
|
link = result_item.find_element_by_xpath('.//a[contains(@class, "ad-title")]') |
|
links[link.get_attribute('href')] = link.text |
|
|
|
try: |
|
for link_href,link_text in links.items(): |
|
o = urlparse.urlparse(link_href) |
|
ad_number = re.match(r".*\/([\d\w\-]+)$",o.path) |
|
if not ad_number: |
|
print "Cannot find ad number" |
|
continue |
|
ad_id = ad_number.group(1) |
|
print ad_id |
|
browser.get(link_href) |
|
|
|
try: |
|
element = WebDriverWait(browser, 10).until( |
|
EC.presence_of_element_located((By.ID, "viewad-action-watchlist")) |
|
) |
|
print "Found it" |
|
except TimeoutException: |
|
print "Timeout!" |
|
continue |
|
|
|
attributes = get_attributes(browser) |
|
|
|
attributes['id'] = ad_id |
|
attributes['url'] = link_href |
|
|
|
if not 'Anzeigennummer' in attributes: |
|
print "No AD ID found..." |
|
continue |
|
|
|
ad_number = attributes['Anzeigennummer'] |
|
|
|
new_ad = False |
|
if ad_number in ads_by_id: |
|
print "Updating ad." |
|
ads_by_id[ad_number].update(attributes) |
|
else: |
|
print "New ad!" |
|
new_ad = True |
|
ads_by_id[ad_number] = attributes |
|
print "Suitable:",is_suitable(ads_by_id[ad_number]) |
|
if not new_ad: |
|
continue |
|
|
|
ad = ads_by_id[ad_number] |
|
ad['suitable'] = is_suitable(ad) |
|
|
|
if ad['suitable']: |
|
if 'contacted' not in ad or ad['contacted'] == False: |
|
print "Not yet contacted!" |
|
if not ad['phone']: |
|
contact(ad,browser) |
|
else: |
|
send_slack_message("Bitte selbst anrufen: %s (%s - %s)" % (ad['phone'],ad['title'],ad['url']) ) |
|
notify_me_of(ad) |
|
else: |
|
send_slack_message("Nicht geeignet: %s (%s)" % (ad['title'],ad['url'])) |
|
pprint.pprint(ads_by_id[ad_number]) |
|
|
|
print "\n\n\n" |
|
|
|
|
|
except KeyboardInterrupt: |
|
print "CTRL-C pressed, aborting..." |
|
raise |
|
finally: |
|
browser.quit() |
|
|
|
if __name__ == '__main__': |
|
|
|
ads = load_db() |
|
|
|
print "Loaded %d entries" % len(ads) |
|
|
|
ads_by_id = {} |
|
|
|
for ad in ads: |
|
if 'Anzeigennummer' in ad: |
|
ads_by_id[ad['Anzeigennummer']] = ad |
|
|
|
while True: |
|
if last_ping is None or time.time()-last_ping > 60*60: |
|
last_ping = time.time() |
|
send_slack_message("Indexed %d ads so far, found %d suitable ones." % (len(ads_by_id),len([ad for ad in ads_by_id.values() if 'suitable' in ad and ad['suitable']]))) |
|
try: |
|
check_ads(ads_by_id) |
|
except KeyboardInterrupt: |
|
save_db(ads_by_id.values()) |
|
break |
|
except: |
|
print "An exception occured..." |
|
print traceback.format_exc() |
|
send_slack_message("Exception: %s" % traceback.format_exc()) |
|
print "Waiting 30 secs..." |
|
save_db(ads_by_id.values()) |
|
time.sleep(30) |