Skip to content

Instantly share code, notes, and snippets.

@Bouni
Created February 7, 2019 16:12
Show Gist options
  • Save Bouni/3590f485d23e56c339826623bf6092d3 to your computer and use it in GitHub Desktop.
Save Bouni/3590f485d23e56c339826623bf6092d3 to your computer and use it in GitHub Desktop.
Scrape disposal dates from Abfallwirtschaft Landkreis Waldshut website
import re
import requests
import html
# Hole Webseite
url = "https://www.abfall-landkreis-waldshut.de/de/termine/abfuhrtage.php?ort=78"
r = requests.get(url)
# baue Regex der alle notwendigen Mülltypen findet <td>Mülltyp</td><td>Daten</td>
trash_types = ["Restmüll","Biotonne","Grünabfälle","Gelber Sack","Blaue Tonne"]
regex = r"<td.*>({})<\/td>\s+<td.*>(.*)<\/td>".format("|".join(trash_types))
data = {}
# Daten aufbereiten
for n, type in enumerate(re.findall(regex, html.unescape(r.text))):
# Daten aufsplitten
dates = [d.strip() for d in type[1].split(";")]
# Jahr herausfinden
year = dates[-1][-4:]
dates[-1] = dates[-1][:-4]
# Daten angleichen
dates = ["{}{}".format(d,year) for d in dates]
# Daten in dict speichern
data[trash_types[n]] = dates
print(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment