Skip to content

Instantly share code, notes, and snippets.

@kitsamho
Last active July 26, 2021 09:24
Show Gist options
  • Select an option

  • Save kitsamho/4f27b5202e569fc8d49ba9cef65ec309 to your computer and use it in GitHub Desktop.

Select an option

Save kitsamho/4f27b5202e569fc8d49ba9cef65ec309 to your computer and use it in GitHub Desktop.
def get_data(df,transform_cols):
""" This is the main function that transforms the raw OWID data into something we can use in the app
Args:
Original DataFrame from csv
Returns:
Processed / cleaned DataFrame
"""
# loop through and subset each country to a list
country_dfs = []
# loop through each country
for country in df.location.unique():
df_country = df[df.location == country] # df masked on country
df_country.date = pd.to_datetime(df_country.date) # convert string date to datetime
df_country = df_country.sort_values(by='date') # sort by date
# transform our continuous columns
for col in transform_cols:
df_country[col] = update_series(df_country[col]).astype(int)
# we will group by week and use the max value in that week
df_country = df_country.groupby(pd.Grouper(key='date', freq='W')).max()
country_dfs.append(df_country) # append unique country dataframe to list
df_final = pd.concat(country_dfs)
df_final = df_final.reset_index().sort_values(by=['location', 'date'])
df_final = df_final.fillna(0)
# select start point and sort date in ascending order
df_final = df_final.sort_values(by='date', ascending=True)
df_final = df_final[df_final.date >= '2020-02-09']
# date needs to be in string format for plotly animations to work
df_final.date = df_final.date.astype(str)
# get rid of any duplicate columns
df_final = df_final.loc[:, ~df_final.columns.duplicated()]
return df_final
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment