Skip to content

Instantly share code, notes, and snippets.

@Wintus
Last active July 24, 2024 14:02
Show Gist options
  • Save Wintus/331655e602f8200b80aabde489ccfb83 to your computer and use it in GitHub Desktop.
Save Wintus/331655e602f8200b80aabde489ccfb83 to your computer and use it in GitHub Desktop.
GSP341
CREATE OR REPLACE MODEL `ecommerce.customer_classification_model`
OPTIONS
(
model_type='logistic_reg',
labels = ['will_buy_on_return_visit']
)
AS
#standardSQL
SELECT
* EXCEPT(fullVisitorId)
FROM
# features
(SELECT
fullVisitorId,
IFNULL(totals.bounces, 0) AS bounces,
IFNULL(totals.timeOnSite, 0) AS time_on_site
FROM
`data-to-insights.ecommerce.web_analytics`
WHERE
totals.newVisits = 1
AND date BETWEEN '20160801' AND '20170430') # train on first 9 months
JOIN
(SELECT
fullvisitorid,
IF(COUNTIF(totals.transactions > 0 AND totals.newVisits IS NULL) > 0, 1, 0) AS will_buy_on_return_visit
FROM
`data-to-insights.ecommerce.web_analytics`
GROUP BY fullvisitorid)
USING (fullVisitorId)
;
SELECT
*
FROM
ML.EVALUATE(MODEL `ecommerce.customer_classification_model`,
(
SELECT
* EXCEPT(fullVisitorId)
FROM
# features
(
SELECT
fullVisitorId,
IFNULL(totals.bounces, 0) AS bounces,
IFNULL(totals.timeOnSite, 0) AS time_on_site
FROM
`data-to-insights.ecommerce.web_analytics`
WHERE
totals.newVisits = 1
AND date BETWEEN '20170501' AND '20170801')
JOIN (
SELECT
fullvisitorid,
IF
(COUNTIF(totals.transactions > 0
AND totals.newVisits IS NULL) > 0, 1, 0) AS will_buy_on_return_visit
FROM
`data-to-insights.ecommerce.web_analytics`
GROUP BY
fullvisitorid)
USING
(fullVisitorId) ));
SELECT
SQRT(mean_squared_error) AS rmse,
mean_absolute_error
FROM
ML.EVALUATE(MODEL `austin.austin_location_model`)
;
SELECT
SQRT(mean_squared_error) AS rmse,
mean_absolute_error
FROM
ML.EVALUATE(MODEL `austin.location_model`)
;
CREATE OR REPLACE MODEL
`austin.austin_location_model`
OPTIONS (
model_type='LINEAR_REG',
labels=['duration_minutes'] ) AS
SELECT
duration_minutes,
# features
start_station_name,
EXTRACT(HOUR FROM start_time) AS start_hour,
EXTRACT(DAYOFWEEK FROM start_time) AS day_of_week,
start_station_name AS location
FROM
`bigquery-public-data.austin_bikeshare.bikeshare_trips`
WHERE
EXTRACT(YEAR FROM start_time) = 2019
;