Created
October 11, 2024 04:42
-
-
Save Dpananos/a806d6088e95c77bd760e9d8332e8260 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET VARIABLE EXPERIMENT_START_DATE = TIMESTAMP '2024-10-01 00:00:00'; | |
SET VARIABLE EXPERIMENT_END_DATE = TIMESTAMP '2024-10-31 00:00:00'; | |
with prep_cleaned_exposures as ( | |
select distinct | |
userid, | |
treatment, | |
min(exposure_time) over (partition by userid) as exposure_time, | |
(min(treatment) over (partition by userid)) <> (max(treatment) over (partition by userid)) as multiple_exposures | |
from experiment_000 | |
where | |
( | |
exposure_time <= getvariable('EXPERIMENT_END_DATE') | |
and | |
exposure_time >= getvariable('EXPERIMENT_START_DATE') | |
) | |
order by treatment, userid | |
) | |
, cleaned_exposures as (select * from prep_cleaned_exposures where not multiple_exposures ) | |
, exposure_balance as (select treatment, count(distinct userid) n_exposed from cleaned_exposures group by 1) | |
, prep_experiment_outcomes as ( | |
select | |
a.userid, | |
a.treatment, | |
a.exposure_time, | |
b.observed_time as event_time, | |
c.n_exposed as at_risk | |
from cleaned_exposures as a | |
left join experiment_000 as b | |
on a.userid = b.userid and a.exposure_time < coalesce(b.observed_time, getvariable('EXPERIMENT_END_DATE')) | |
left join exposure_balance c on a.treatment = c.treatment | |
) | |
, cleaned_experiment_outcomes as ( | |
select | |
userid, | |
treatment, | |
exposure_time, | |
event_time is not NULL as is_observed, | |
at_risk, | |
coalesce(event_time, getvariable('EXPERIMENT_END_DATE') ) as observed_time, | |
datediff('seconds', exposure_time, coalesce(event_time, getvariable('EXPERIMENT_END_DATE') )) * 1.0 / (86400) as event_time_days, | |
from prep_experiment_outcomes | |
order by treatment, userid | |
) | |
, prep_lifetable_1 as ( | |
select | |
treatment, | |
case when not is_observed then floor(event_time_days) else floor(event_time_days) + 0.5 end as event_time, | |
coalesce(count(distinct case when is_observed then userid end), 0) as n_event, | |
coalesce(count(distinct case when not is_observed then userid end), 0) as n_censor, | |
at_risk as n_exposed | |
from cleaned_experiment_outcomes | |
group by 1, 2, 5 | |
order by 1, 2 | |
) | |
, life_table as ( | |
select | |
*, | |
coalesce(lag(at_risk1, 1) over (partition by treatment order by event_time), n_exposed) as at_risk, | |
n_event::float / at_risk::float as hazard | |
from( | |
select | |
*, | |
n_exposed - (sum(n_event + n_censor) over (partition by treatment order by event_time rows between unbounded preceding and current row)) as at_risk1 | |
from prep_lifetable_1 | |
order by 1, 2 | |
) | |
) | |
select | |
*, | |
exp(sum(ln(1-hazard)) over (partition by treatment order by event_time rows between unbounded preceding and current row)) as S | |
from life_table | |
order by 1, 2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment