Skip to content

Instantly share code, notes, and snippets.

@auselen
Last active August 17, 2021 11:31
Show Gist options
  • Save auselen/75966c217b596995acba to your computer and use it in GitHub Desktop.
Save auselen/75966c217b596995acba to your computer and use it in GitHub Desktop.
scrap from blocket.se
from requests import get
from lxml import html
from operator import itemgetter
citycodes = {'skane' : '23', 'lund': '23_10', 'eslov' : '23_2'}
result = []
def list(city, keyword, limit):
url = 'http://www.blocket.se/' + city + '?q=' + keyword + \
'&cg=0&w=0&st=s&ca=' + citycodes[city] + '&is=1&l=0&md=th'
print "Looking for", keyword, "in", city, url
text = get(url)
doc = html.fromstring(text.content)
articles = doc.xpath('//*/article/div')
l = 0
for v in articles:
if l >= limit:
break
l += 1
if not v.get("itemprop"):
continue
match = {}
match['keyword'] = keyword
match['date'] = v[0][1].get("datetime")
match['text'] = v[1][0].text
price = v[2].text
if price:
price = v[2].text.replace(":-", "").replace(" ", "")
else:
price = "?"
match['price'] = price
match['city'] = v[0][0][1].text
match['link'] = v[1][0].get("href")
result.append(match)
favorites = ["nintendo", "wii," "3ds", "gamecube", "sega", "dreamcast", "xbox", "x-box", "ps3", "ps4", "playstation", "lego", "playmobil", "psvita", "ps vita", "psp"]
query = ' ELLER '.join(favorites)
print query
list("skane", query, len(favorites) * 5)
#for f in favorites:
# list("skane", f, 5)
sorted_result = sorted(result, key=itemgetter('date'), reverse=True)
for m in sorted_result:
print '\033[96m' + m['date'],
#print '%-16s' % ('\033[95m' + m['keyword']),
print '\033[94m' + m['text'],
print '\033[93m' + m['price'],
print '\033[92m' + m['city'],
print '\033[91m' + m['link'],
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment