Last active
October 10, 2020 22:32
-
-
Save matiasmicheletto/02e271fce402ab210afa67953bdc2070 to your computer and use it in GitHub Desktop.
List and plot the top countries with highest number of covid-19 deaths per million people.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
List and plot the top countries with highest number of covid-19 deaths per million people. | |
""" | |
import csv | |
import requests | |
import matplotlib.pyplot as plt | |
# Configuration | |
variable = "deaths" # Change for 'cases' to compare number of cases per million | |
days = 250 # Days after 1st case or death | |
rank = 13 # Number of countries of the ranking | |
chart = True # Show plot | |
# Source of data | |
#url = "https://covid.ourworldindata.org/data/owid-covid-data.csv" | |
url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv" | |
# Column numbers | |
column_country = 2 # Country name | |
column_date = 3 # Date | |
column_dpm = 10 if variable == "deaths" else 13 # Cases or deaths per million | |
def toFloat(value): | |
# Helper to convert str to float | |
try: | |
result = float(value) | |
except ValueError: | |
result = 0 | |
return result | |
with requests.Session() as s: | |
# Download data | |
download = s.get(url) | |
decoded_content = download.content.decode('utf-8') | |
myreader = csv.reader(decoded_content.splitlines(), delimiter=',') | |
rows = list(myreader) | |
# Dataframes | |
cases = {} # Cases or deaths | |
dates = {} # Dates (duh) | |
for row in rows[1:]: # For each day | |
c = toFloat(row[column_dpm]) # Cases or deaths per million inhabitants | |
d = row[column_date] | |
if c != 0: # Do not add until first case or death | |
if row[column_country] in cases: # If country already added | |
cases[row[column_country]].append(c) | |
dates[row[column_country]].append(d) | |
else: # If not, then create entry | |
cases[row[column_country]] = [c] | |
dates[row[column_country]] = [d] | |
# Get cases or deaths on day N for each country | |
dataDN = {} | |
datesDN = {} | |
for country in cases: | |
if len(cases[country]) >= days: | |
dataDN[country] = cases[country][days-1] | |
datesDN[country] = dates[country][days-1] | |
# Sort the list and print | |
sorted_list = sorted(dataDN.items(), key=lambda x: x[1], reverse=True) | |
print("Country".ljust(20) + "Cases/Hab".ljust(13) + "Date of day {} after 1st case".format(days)) | |
print("-------------------------------------------------------------------") | |
cnt = 0 | |
for country in sorted_list: | |
cnt+=1 | |
if cnt <= rank: | |
print(country[0][:18].ljust(20) + str(country[1]).ljust(13) + datesDN[country[0]]) | |
if chart: | |
plt.plot(cases[country[0]][:days], label=country) | |
else: | |
break | |
if chart: | |
plt.xlabel('Days after the 1st case') | |
plt.ylabel('Deaths per million people') | |
plt.title('Deaths per million people after the 1st case') | |
plt.legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment