Last active
May 23, 2019 14:34
-
-
Save ZaxR/165a9a09b9d967c0180cf0dbb760a201 to your computer and use it in GitHub Desktop.
[Standard] Bigquery regex, adding missing dates, and forward filling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH some_data AS ( | |
SELECT | |
_TDLinx_Store_Code_ as store_code, | |
_UPC_ AS upc, | |
DATE(PARSE_TIMESTAMP("%m/%d/%Y", (REGEXP_EXTRACT(_Period_Description_Short_, "\\S+\\s*$")))) AS end_date, | |
___ AS dollars, | |
_Units_ AS units | |
FROM | |
`project.dataset.table` | |
), end_dates AS ( | |
SELECT | |
store_code, | |
upc, | |
end_date as end_dt | |
FROM ( | |
SELECT | |
store_code, | |
upc, | |
MIN(end_date) min_dt, | |
MAX(end_date) max_dt | |
FROM some_data | |
GROUP BY | |
store_code, upc | |
), UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt, INTERVAL 1 WEEK)) end_date | |
) | |
SELECT | |
LAST_VALUE(dollars IGNORE NULLS) OVER(PARTITION BY end_dates.store_code, end_dates.upc ORDER BY some_data.end_date) dollars, | |
LAST_VALUE(units IGNORE NULLS) OVER(PARTITION BY end_dates.store_code, end_dates.upc ORDER BY some_data.end_date) units, | |
end_dates.end_dt time_period_end_date, | |
end_dates.tdlinx_store_code store_id, | |
end_dates.upc upc | |
FROM | |
end_dates | |
LEFT JOIN | |
some_data | |
ON | |
end_dates.store_code = some_data.store_code AND | |
end_dates.upc = some_data.upc AND | |
end_dates.end_dt = some_data.end_date | |
ORDER BY | |
end_dates.store_code, | |
end_dates.upc, | |
end_dates.end_dt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment