Created
March 6, 2018 10:40
-
-
Save defufna/0c44ada28adabf62e851812db72e4aa1 to your computer and use it in GitHub Desktop.
Plot measles epidemic in Serbia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
import bs4 | |
import re | |
import datetime | |
from matplotlib import pyplot | |
replacements = str.maketrans({ | |
'а':'a', | |
'е':'e', | |
'ј':'j', | |
'к':"k", | |
'м':"m", | |
'о':"o", | |
'т':"t", | |
'А':'A', | |
'Е':'E', | |
'Ј':'J', | |
'К':"K", | |
'М':"M", | |
'О':"O", | |
'Т':"T"}) | |
url = "http://www.batut.org.rs/index.php?content=1629" | |
proxies={"http":"socks5://localhost:8081"} | |
def get_text(url, proxies): | |
r = requests.get(url, proxies=proxies) | |
data = r.content.decode("utf8") | |
soup = bs4.BeautifulSoup(data, "html.parser") | |
text = soup.find_all(attrs={"class":"news_content_full"})[0].get_text() | |
text = text.replace("\r\n", "\n").replace("\n\n","") | |
text = text.translate(replacements) | |
return text | |
def get_first_paragraphs(text): | |
start = "Od početka oktobra 2017" | |
num_re = re.compile("ukupno (\\d+) sluča") | |
return [x for x in text.splitlines() if start in x and len(num_re.findall(x))==1] | |
date_re = re.compile("\\d{1,2}\\.\\d{1,2}\\.\\s{,3}\\d{2,4}\\.") | |
def extract_date(s): | |
global date_re | |
d = date_re.findall(s)[0].replace(" ", "") | |
return datetime.datetime.strptime(d, "%d.%m.%Y.") | |
def extract(paragraphs): | |
num_re = re.compile("ukupno (\\d+) sluča") | |
return [(extract_date(x), int(num_re.findall(x)[0])) for x in paragraphs] | |
def get_data(url, proxies): | |
data = get_first_paragraphs(get_text(url, proxies)) | |
result = extract(data); | |
return result | |
if __name__ == "__main__": | |
d = get_data(url, proxies) | |
x,y = zip(*d) | |
fig = pyplot.figure() | |
ax = fig.add_subplot(111) | |
ax.set_title("Epidemija morbila u Srbiji", fontsize=18, fontweight="bold") | |
ax.plot(x,y) | |
ax.text(1.0, 0, "Total: {0}".format(y[0]), verticalalignment='bottom', | |
horizontalalignment='right', transform=ax.transAxes, fontsize=14) | |
pyplot.show(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment