Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Created June 10, 2021 14:43
Show Gist options
  • Save denis-bz/2d0a98fdfced9bd2dc2384f16a7c1f5d to your computer and use it in GitHub Desktop.
Save denis-bz/2d0a98fdfced9bd2dc2384f16a7c1f5d to your computer and use it in GitHub Desktop.
Read German Corona data rki.de/.../Fallzahlen_Kum_Tab.xlsx in python pandas
#!/usr/bin/env python3
""" https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Fallzahlen_Kum_Tab.html .xlsx
-> python pandas read_excel
"""
# sheets, from openpyxl --
# 0 Erlaeuterungen 20 rows 1 cols
# 1 Fälle-Todesfälle-gesamt 509 rows 16384 cols ?!
# 2 BL_7-Tage-Fallzahlen (fixiert) 39 rows 396 cols
# 3 BL_7-Tage-Inzidenz (fixiert) 21 rows 396 cols
# 4 LK_7-Tage-Fallzahlen (fixiert) 419 rows 207 cols
# 5 LK_7-Tage-Inzidenz (fixiert) 433 rows 202 cols
import numpy as np
import openpyxl # read_excel engine
import pandas as pd
cols = {
0: "Day", # 'Berichtsdatum',
1: "TotalCases", # 'Anzahl COVID-19-Fälle',
# 2: None,
3: "Cases", # 'Differenz Vortag Fälle',
4: "TotalDeaths", # 'Todesfälle',
5: "Deaths", # 'Differenz Vortag Todesfälle'
}
def read_Fallzahlen_Kum_Tab( xlsxin: "file-like" ) -> pd.DataFrame:
print( "\n-- pd.read_excel( \"%s\" )" % xlsxin )
df = (pd.read_excel( xlsxin, sheet_name=1, header=2, usecols=[0,1, 3,4,5],
index_col=0, parse_dates=True,
engine="openpyxl",
)
.dropna( axis="columns", how="all" ) # 16384 !
.dropna( axis="index", how="any" ) # early March 2020
.astype( int )
)
df.index.name = "Day"
df.columns = list( cols.values() )[1:]
return df[::-1] # newest first
#...............................................................................
if __name__ == "__main__":
import sys
print( 80 * "▄" )
args = sys.argv[1:]
# while args and ("=" in args[0]):
# exec( args.pop( 0 ), globals() )
xlsxin = args[0] if args \
else "../../covid/data/8jun-Fallzahlen_Kum_Tab.xlsx"
df = read_Fallzahlen_Kum_Tab( xlsxin )
print( df )
"""
TotalCases Cases TotalDeaths Deaths
Day
2021-08-06 3702688 1204 89384 140
2021-07-06 3701484 1117 89244 22
2021-06-06 3700367 2440 89222 74
...
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment