Created
March 12, 2020 11:35
-
-
Save philshem/7907b431a37c1f51540fd92e21c3511f to your computer and use it in GitHub Desktop.
get kanton zurich coronavirus cases and do some basic calcs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| url = 'https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_Kanton_ZH_alter_geschlecht.csv' | |
| # define date parsing format | |
| dateparse = lambda dates: [datetime.strptime(d, '%d.%m.%Y') for d in dates] | |
| # read csv from web | |
| df = pd.read_csv(url,parse_dates=['Date'], date_parser=dateparse) | |
| # calculate total new confirmed, from one of 2 columns | |
| df['NewConfirmed'] = np.where(df.NewConfCases == 1.0, 1.0, df.NewPosTests1) | |
| # delete unused columns | |
| del df['NewConfCases'] | |
| del df['NewPosTests1'] | |
| del df['AgeYear'] | |
| del df['NewDeaths'] | |
| del df['NewCured'] | |
| # get total confirmed cases per date | |
| df = df.groupby([df['Date'].dt.date]).sum() | |
| # create index for missing dates | |
| idx = pd.date_range(df.index.min(), df.index.max()) | |
| # add missing dates to dataframe | |
| df = df.reindex(idx, fill_value=0) | |
| # get running sum of total confirmed cases | |
| df['TotalCases'] = df.NewConfirmed.cumsum() | |
| # calculate percent change from previous day | |
| df['GrowthRate'] = df.TotalCases.pct_change()#.mul(100).round(2) | |
| print(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment