Created
June 10, 2021 14:43
-
-
Save denis-bz/2d0a98fdfced9bd2dc2384f16a7c1f5d to your computer and use it in GitHub Desktop.
Read German Corona data rki.de/.../Fallzahlen_Kum_Tab.xlsx in python pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Fallzahlen_Kum_Tab.html .xlsx | |
-> python pandas read_excel | |
""" | |
# sheets, from openpyxl -- | |
# 0 Erlaeuterungen 20 rows 1 cols | |
# 1 Fälle-Todesfälle-gesamt 509 rows 16384 cols ?! | |
# 2 BL_7-Tage-Fallzahlen (fixiert) 39 rows 396 cols | |
# 3 BL_7-Tage-Inzidenz (fixiert) 21 rows 396 cols | |
# 4 LK_7-Tage-Fallzahlen (fixiert) 419 rows 207 cols | |
# 5 LK_7-Tage-Inzidenz (fixiert) 433 rows 202 cols | |
import numpy as np | |
import openpyxl # read_excel engine | |
import pandas as pd | |
cols = { | |
0: "Day", # 'Berichtsdatum', | |
1: "TotalCases", # 'Anzahl COVID-19-Fälle', | |
# 2: None, | |
3: "Cases", # 'Differenz Vortag Fälle', | |
4: "TotalDeaths", # 'Todesfälle', | |
5: "Deaths", # 'Differenz Vortag Todesfälle' | |
} | |
def read_Fallzahlen_Kum_Tab( xlsxin: "file-like" ) -> pd.DataFrame: | |
print( "\n-- pd.read_excel( \"%s\" )" % xlsxin ) | |
df = (pd.read_excel( xlsxin, sheet_name=1, header=2, usecols=[0,1, 3,4,5], | |
index_col=0, parse_dates=True, | |
engine="openpyxl", | |
) | |
.dropna( axis="columns", how="all" ) # 16384 ! | |
.dropna( axis="index", how="any" ) # early March 2020 | |
.astype( int ) | |
) | |
df.index.name = "Day" | |
df.columns = list( cols.values() )[1:] | |
return df[::-1] # newest first | |
#............................................................................... | |
if __name__ == "__main__": | |
import sys | |
print( 80 * "▄" ) | |
args = sys.argv[1:] | |
# while args and ("=" in args[0]): | |
# exec( args.pop( 0 ), globals() ) | |
xlsxin = args[0] if args \ | |
else "../../covid/data/8jun-Fallzahlen_Kum_Tab.xlsx" | |
df = read_Fallzahlen_Kum_Tab( xlsxin ) | |
print( df ) | |
""" | |
TotalCases Cases TotalDeaths Deaths | |
Day | |
2021-08-06 3702688 1204 89384 140 | |
2021-07-06 3701484 1117 89244 22 | |
2021-06-06 3700367 2440 89222 74 | |
... | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment