rasmi · October 4, 2019 19:50
diff --git a/time_series.sql b/time_series.sql
 # Copyright 2019 Google LLC.
 # SPDX-License-Identifier: Apache-2.0

 WITH

 # Generate a sample time series from the taxi trips dataset.
 sample_time_series AS (
  SELECT
    SUBSTR(taxi_id, 16, 24) AS taxi_id, # Truncate for readability.
    trip_start_timestamp,
    trip_seconds
  FROM
    `bigquery-public-data.chicago_taxi_trips.taxi_trips`
  WHERE
    SUBSTR(taxi_id, 0, 2) = 'cc' # Downsample data based on hash
  ORDER BY
    taxi_id,
    trip_start_timestamp
 )

 # Predict the trip seconds for the next ride.
 # Note that time intervals here are not regular (ex. hourly).
 # Some cleanup still required, such as removing next_trip_seconds nulls
 # and sequences that are too short.
 # Can create a second window for a different level of aggregation for
 # time series history.
 SELECT
  taxi_id,
  trip_start_timestamp,
  trip_seconds,
  LEAD(trip_seconds, 1) OVER (
    PARTITION BY
      taxi_id
    ORDER BY
      trip_start_timestamp
  ) AS next_trip_seconds,
  # ARRAY_AGG cannot aggreggate nulls.
  ARRAY_AGG(IFNULL(trip_seconds, 0)) OVER (
    PARTITION BY
      taxi_id
    ORDER BY
      # AutoML Tables will treat as an ordered array
      # OFFSET(0) in array should be the oldest element
      trip_start_timestamp 
    ROWS BETWEEN 10 PRECEDING AND CURRENT ROW 
  ) AS trip_history
 FROM
  sample_time_series
	# Copyright 2019 Google LLC.
	# SPDX-License-Identifier: Apache-2.0

	WITH

	# Generate a sample time series from the taxi trips dataset.
	sample_time_series AS (
	SELECT
	SUBSTR(taxi_id, 16, 24) AS taxi_id, # Truncate for readability.
	trip_start_timestamp,
	trip_seconds
	FROM
	`bigquery-public-data.chicago_taxi_trips.taxi_trips`
	WHERE
	SUBSTR(taxi_id, 0, 2) = 'cc' # Downsample data based on hash
	ORDER BY
	taxi_id,
	trip_start_timestamp
	)

	# Predict the trip seconds for the next ride.
	# Note that time intervals here are not regular (ex. hourly).
	# Some cleanup still required, such as removing next_trip_seconds nulls
	# and sequences that are too short.
	# Can create a second window for a different level of aggregation for
	# time series history.
	SELECT
	taxi_id,
	trip_start_timestamp,
	trip_seconds,
	LEAD(trip_seconds, 1) OVER (
	PARTITION BY
	taxi_id
	ORDER BY
	trip_start_timestamp
	) AS next_trip_seconds,
	# ARRAY_AGG cannot aggreggate nulls.
	ARRAY_AGG(IFNULL(trip_seconds, 0)) OVER (
	PARTITION BY
	taxi_id
	ORDER BY
	# AutoML Tables will treat as an ordered array
	# OFFSET(0) in array should be the oldest element
	trip_start_timestamp
	ROWS BETWEEN 10 PRECEDING AND CURRENT ROW
	) AS trip_history
	FROM
	sample_time_series
No results found