Skip to content

Instantly share code, notes, and snippets.

@leoluk
Created January 7, 2020 18:43
Show Gist options
  • Save leoluk/d3f38bacc3973425ba57909e3779cc3e to your computer and use it in GitHub Desktop.
Save leoluk/d3f38bacc3973425ba57909e3779cc3e to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# -*- coding: utf8 -*-
"""
Screen scrapes Allestörungen.de spark lines
"""
import sys
import requests
import lxml.html
def fetch_page(url="http://allestörungen.de"):
"""
Fetches the Allestörungen.de main page and returns a lxml.html tree
"""
s = requests.session()
del s.headers['User-Agent']
tree = lxml.html.fromstring(s.get(url).text)
return tree
def parse_sparkline(url):
"""
Parse a sparkline URL into a list of integers
"""
data = [int(s) for s in url.split('=')[-1].split(',')]
return data
def extract_sparkline(tree, company):
"""
Extract a single sparkline URL for a specific company
"""
sparkline = tree.xpath(
"//a[@title='{company}']/div[@class='caption']/img/@data-original"
.format(company=company))[0]
return parse_sparkline(sparkline)
def extract_all_sparklines(tree):
"""
Extract all sparklines for all companies and yield them as
(company, list_of_values) tuples.
"""
sparklines = tree.xpath("//div[@class='caption']/img/@data-original")
titles = tree.xpath("//div[@class='caption']/parent::a/@title")
yield from zip(titles, map(parse_sparkline, sparklines))
if __name__ == '__main__':
tree = fetch_page()
for company, sparkline in extract_all_sparklines(tree):
print('downdetector_reports{company="%s"}=%s' % (company, sparkline[-1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment