Created
January 7, 2020 18:43
-
-
Save leoluk/d3f38bacc3973425ba57909e3779cc3e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf8 -*- | |
""" | |
Screen scrapes Allestörungen.de spark lines | |
""" | |
import sys | |
import requests | |
import lxml.html | |
def fetch_page(url="http://allestörungen.de"): | |
""" | |
Fetches the Allestörungen.de main page and returns a lxml.html tree | |
""" | |
s = requests.session() | |
del s.headers['User-Agent'] | |
tree = lxml.html.fromstring(s.get(url).text) | |
return tree | |
def parse_sparkline(url): | |
""" | |
Parse a sparkline URL into a list of integers | |
""" | |
data = [int(s) for s in url.split('=')[-1].split(',')] | |
return data | |
def extract_sparkline(tree, company): | |
""" | |
Extract a single sparkline URL for a specific company | |
""" | |
sparkline = tree.xpath( | |
"//a[@title='{company}']/div[@class='caption']/img/@data-original" | |
.format(company=company))[0] | |
return parse_sparkline(sparkline) | |
def extract_all_sparklines(tree): | |
""" | |
Extract all sparklines for all companies and yield them as | |
(company, list_of_values) tuples. | |
""" | |
sparklines = tree.xpath("//div[@class='caption']/img/@data-original") | |
titles = tree.xpath("//div[@class='caption']/parent::a/@title") | |
yield from zip(titles, map(parse_sparkline, sparklines)) | |
if __name__ == '__main__': | |
tree = fetch_page() | |
for company, sparkline in extract_all_sparklines(tree): | |
print('downdetector_reports{company="%s"}=%s' % (company, sparkline[-1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment