Created
April 7, 2020 17:29
-
-
Save wesleyit/6b6e94bd4f1e0d65fad4a63aa6d70818 to your computer and use it in GitHub Desktop.
AWS Athena query for preparing a dataset using SQL instead Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with dataset as ( | |
select | |
rc.country, | |
rc."date", | |
rc.cases, | |
lag(rc.cases,1) over(partition by rc.country order by rc."date") as yesterday_cases, | |
sum(rc.cases) over(partition by rc.country order by rc."date") as acc_cases, | |
rc.deaths, | |
lag(rc.deaths,1) over(partition by rc.country order by rc."date") as yesterday_deaths, | |
sum(rc.deaths) over(partition by rc.country order by rc."date") as acc_deaths | |
from covid19_analytics.report_covid as rc | |
order by | |
rc.country asc, | |
rc."date" asc | |
) | |
select | |
country, | |
"date", | |
cases, | |
1.0 * (cases - yesterday_cases) / nullif(yesterday_cases, 0) as cases_rate, | |
avg(cases) over(partition by country order by "date" asc rows 7 preceding) as cases_avg_7, | |
avg(cases) over(partition by country order by "date" asc rows 15 preceding) as cases_avg_15, | |
avg(cases) over(partition by country order by "date" asc rows 30 preceding) as cases_avg_30, | |
geometric_mean(cases) over(partition by country order by "date" asc rows 7 preceding) as cases_mean_7, | |
geometric_mean(cases) over(partition by country order by "date" asc rows 15 preceding) as cases_mean_15, | |
geometric_mean(cases) over(partition by country order by "date" asc rows 30 preceding) as cases_mean_30, | |
variance(cases) over(partition by country order by "date" asc rows 30 preceding) as cases_variance_30, | |
stddev(cases) over(partition by country order by "date" asc rows 30 preceding) as cases_stddev_30, | |
acc_cases, | |
deaths, | |
1.0 * (deaths - yesterday_deaths) / nullif(yesterday_deaths, 0) as deaths_rate, | |
avg(deaths) over(partition by country order by "date" asc rows 7 preceding) as deaths_avg_7, | |
avg(deaths) over(partition by country order by "date" asc rows 15 preceding) as deaths_avg_15, | |
avg(deaths) over(partition by country order by "date" asc rows 30 preceding) as deaths_avg_30, | |
geometric_mean(deaths) over(partition by country order by "date" asc rows 7 preceding) as deaths_mean_7, | |
geometric_mean(deaths) over(partition by country order by "date" asc rows 15 preceding) as deaths_mean_15, | |
geometric_mean(deaths) over(partition by country order by "date" asc rows 30 preceding) as deaths_mean_30, | |
variance(deaths) over(partition by country order by "date" asc rows 30 preceding) as cases_deaths_30, | |
stddev(deaths) over(partition by country order by "date" asc rows 30 preceding) as cases_deaths_30, | |
acc_deaths | |
from dataset |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment