Skip to content

Instantly share code, notes, and snippets.

@defufna
Created March 6, 2018 10:40
Show Gist options
  • Save defufna/0c44ada28adabf62e851812db72e4aa1 to your computer and use it in GitHub Desktop.
Save defufna/0c44ada28adabf62e851812db72e4aa1 to your computer and use it in GitHub Desktop.
Plot measles epidemic in Serbia
# -*- coding: utf-8 -*-
import requests
import bs4
import re
import datetime
from matplotlib import pyplot
replacements = str.maketrans({
'а':'a',
'е':'e',
'ј':'j',
'к':"k",
'м':"m",
'о':"o",
'т':"t",
'А':'A',
'Е':'E',
'Ј':'J',
'К':"K",
'М':"M",
'О':"O",
'Т':"T"})
url = "http://www.batut.org.rs/index.php?content=1629"
proxies={"http":"socks5://localhost:8081"}
def get_text(url, proxies):
r = requests.get(url, proxies=proxies)
data = r.content.decode("utf8")
soup = bs4.BeautifulSoup(data, "html.parser")
text = soup.find_all(attrs={"class":"news_content_full"})[0].get_text()
text = text.replace("\r\n", "\n").replace("\n\n","")
text = text.translate(replacements)
return text
def get_first_paragraphs(text):
start = "Od početka oktobra 2017"
num_re = re.compile("ukupno (\\d+) sluča")
return [x for x in text.splitlines() if start in x and len(num_re.findall(x))==1]
date_re = re.compile("\\d{1,2}\\.\\d{1,2}\\.\\s{,3}\\d{2,4}\\.")
def extract_date(s):
global date_re
d = date_re.findall(s)[0].replace(" ", "")
return datetime.datetime.strptime(d, "%d.%m.%Y.")
def extract(paragraphs):
num_re = re.compile("ukupno (\\d+) sluča")
return [(extract_date(x), int(num_re.findall(x)[0])) for x in paragraphs]
def get_data(url, proxies):
data = get_first_paragraphs(get_text(url, proxies))
result = extract(data);
return result
if __name__ == "__main__":
d = get_data(url, proxies)
x,y = zip(*d)
fig = pyplot.figure()
ax = fig.add_subplot(111)
ax.set_title("Epidemija morbila u Srbiji", fontsize=18, fontweight="bold")
ax.plot(x,y)
ax.text(1.0, 0, "Total: {0}".format(y[0]), verticalalignment='bottom',
horizontalalignment='right', transform=ax.transAxes, fontsize=14)
pyplot.show();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment