Skip to content

Instantly share code, notes, and snippets.

@jaksi
Created September 7, 2016 17:49
Show Gist options
  • Save jaksi/046d8ee348d770b374ab510f33c5b8b6 to your computer and use it in GitHub Desktop.
Save jaksi/046d8ee348d770b374ab510f33c5b8b6 to your computer and use it in GitHub Desktop.
Scrape ingatlan.com for new apartments and send notifications via Pushbullet
#!/usr/bin/env python3
import traceback
from time import sleep
from lxml import html
from pushbullet import PushBullet
def get_apartments(url):
print("GET {}".format(url))
parser = html.HTMLParser(encoding='UTF-8')
tree = html.parse(url, parser=parser)
apartments = tree.findall('//table[@class=\'search-results\']/tbody/tr[@data-id]')
result = {}
for apartment_element in apartments:
elements = apartment_element.findall('td')
apartment = {}
address_element = elements[1].find('a')
address_element.make_links_absolute()
apartment['address'] = ', '.join([part for part in address_element.itertext() if not part.isspace()])
apartment['url'] = address_element.get('href')
price_element = elements[2].find('.//span[@class=\'price-huf\']')
if price_element is None:
price_element = elements[2].find('.//span[@class=\'price-eur\']')
apartment['price'] = price_element.text_content()
apartment['size'] = elements[3].text_content()
apartment['rooms'] = "{} szoba".format(elements[4].text_content())
result[apartment_element.get('data-id')] = apartment
button_next = tree.find('//a[@class=\'button next\']')
if button_next is not None:
button_next.make_links_absolute()
sleep(5)
result.update(get_apartments(button_next.get('href')))
return result
def main():
pushbullet = PushBullet('<pushbullet token>')
emails = []
url = ('http://ingatlan.com/listasz/kiado+lakas+butorozott+csak-gepesitett+i-ker+ii-ker+ix-ker+v-ker+vi-ker+vii-ker+viii-ker+xi-ker+xii-ker+havi-60-90-ezer-Ft+25-m2-felett+i-ii-ix-v-vi-vii-viii-xi-xii-ker')
apartments = get_apartments(url)
print("{} apartments".format(len(apartments)))
cur = list(apartments.values())[0]
for email in emails:
pushbullet.push_link(cur['address'], cur['url'],
"{0[price]}, {0[rooms]}, {0[size]}".format(cur),
email=email)
while True:
sleep(5 * 60)
try:
new_apartments = get_apartments(url)
print("{} new apartments".format(len(new_apartments)))
if not new_apartments:
continue
for apartment in new_apartments:
if apartment not in apartments:
cur = new_apartments[apartment]
print("New apartment: {}".format(cur))
for email in emails:
pushbullet.push_link(cur['address'], cur['url'],
"{0[price]}, {0[rooms]}, {0[size]}".format(cur),
email=email)
apartments.update(new_apartments)
print("{} apartments".format(len(apartments)))
except:
traceback.print_exc()
if __name__ == '__main__':
main()
@TorokLev
Copy link

Structure of ingatlan.com has changed. The original structure doesn't work anymore.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment