SET sampleRate=0.1; -- in range (0.0-1.0]
select
field1, field2, field3, ..., fieldN, state
from (
select
field1, field2, field3, ..., fieldN, state,
count(*) over (partition by state) as state_cnt,
rank() over (partition by state order by rand()) as state_rank
from
<table name>
) t1
where state_rank <= (state_cnt * ${hivevar:sampleRate})
Created
July 10, 2017 09:14
-
-
Save myui/649c08cd2fb951e94eb4fa215eaf9df2 to your computer and use it in GitHub Desktop.
stratified sampling
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment