Skip to content

Instantly share code, notes, and snippets.

@ZaxR
Last active May 23, 2019 14:34
Show Gist options
  • Save ZaxR/165a9a09b9d967c0180cf0dbb760a201 to your computer and use it in GitHub Desktop.
Save ZaxR/165a9a09b9d967c0180cf0dbb760a201 to your computer and use it in GitHub Desktop.
[Standard] Bigquery regex, adding missing dates, and forward filling
WITH some_data AS (
  SELECT
    _TDLinx_Store_Code_ as store_code,
    _UPC_ AS upc,
DATE(PARSE_TIMESTAMP("%m/%d/%Y", (REGEXP_EXTRACT(_Period_Description_Short_, "\\S+\\s*$")))) AS end_date,
    ___ AS dollars,
    _Units_ AS units
  FROM
    `project.dataset.table`
), end_dates AS (
SELECT
store_code,
upc,
end_date as end_dt
FROM (
   SELECT
store_code,
upc,
     MIN(end_date) min_dt,
      MAX(end_date) max_dt
    FROM some_data
GROUP BY
store_code, upc
  ), UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt, INTERVAL 1 WEEK)) end_date
)
SELECT
LAST_VALUE(dollars IGNORE NULLS) OVER(PARTITION BY end_dates.store_code, end_dates.upc ORDER BY some_data.end_date) dollars,
  LAST_VALUE(units IGNORE NULLS) OVER(PARTITION BY end_dates.store_code, end_dates.upc ORDER BY some_data.end_date) units,
end_dates.end_dt time_period_end_date,
end_dates.tdlinx_store_code store_id,
end_dates.upc upc
FROM
end_dates
LEFT JOIN
some_data
ON
end_dates.store_code = some_data.store_code AND
end_dates.upc = some_data.upc AND
end_dates.end_dt = some_data.end_date
ORDER BY
end_dates.store_code,
end_dates.upc,
end_dates.end_dt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment