kurasaiteja · March 21, 2021 14:54
diff --git a/clean_daily.py b/clean_daily.py
 import math
 # unique dates 
 dates = vaccine_df.date.unique().tolist()
 #For more smoother animations
 dates.extend(['2020-12-12', '2020-12-13']) 

 # unique countries 
 countries = vaccine_df.country.unique().tolist()

 # for easy processing 
 short = vaccine_df[['date', 'country', 'total_vaccinations']]

 # values of unqiue (date, country) already in short 
 # i.e we want to make sure we have some data for each, even if it is 0 
 keys= list(zip(short.date.tolist(), short.country.tolist()))
 for date in dates:
    for country in countries:
        idx = (date, country)
        if idx not in keys:
            if date == min(dates):
                # this means there's no entry for {country} on the earliest date 
                short = short.append({
                    "date": date, 
                    "country": country, 
                    "total_vaccinations": 0
                }, ignore_index=True)
            else:
                # entry for {country} is missing on a date other than the earliest
                short = short.append({
                    "date": date, 
                    "country": country, 
                    "total_vaccinations": pd.NA
                }, ignore_index=True)
                
 #fill missing values with previous day values (this is OK since it is cumulative)
 short = short.sort_values(['country', 'date'])

 short.total_vaccinations = short.total_vaccinations.fillna(method='ffill')

 # scale the number by log to make the color transitions smoother
 vaccines = short.sort_values('date')
 vaccines['log_scale'] = vaccines['total_vaccinations'].apply(lambda x : math.log2(x+1))
	import math
	# unique dates
	dates = vaccine_df.date.unique().tolist()
	#For more smoother animations
	dates.extend(['2020-12-12', '2020-12-13'])

	# unique countries
	countries = vaccine_df.country.unique().tolist()

	# for easy processing
	short = vaccine_df[['date', 'country', 'total_vaccinations']]

	# values of unqiue (date, country) already in short
	# i.e we want to make sure we have some data for each, even if it is 0
	keys= list(zip(short.date.tolist(), short.country.tolist()))
	for date in dates:
	for country in countries:
	idx = (date, country)
	if idx not in keys:
	if date == min(dates):
	# this means there's no entry for {country} on the earliest date
	short = short.append({
	"date": date,
	"country": country,
	"total_vaccinations": 0
	}, ignore_index=True)
	else:
	# entry for {country} is missing on a date other than the earliest
	short = short.append({
	"date": date,
	"country": country,
	"total_vaccinations": pd.NA
	}, ignore_index=True)

	#fill missing values with previous day values (this is OK since it is cumulative)
	short = short.sort_values(['country', 'date'])

	short.total_vaccinations = short.total_vaccinations.fillna(method='ffill')

	# scale the number by log to make the color transitions smoother
	vaccines = short.sort_values('date')
	vaccines['log_scale'] = vaccines['total_vaccinations'].apply(lambda x : math.log2(x+1))