No metrics in
Self Joins
- Cross Join example
- Inner Join example
joins can be inefficent - they can require moving lots of data around and increase the data process from n to n * n
Windowing Functions
| -- Count the Number of Sessions Generated by Organic Searches | |
| SELECT COUNT(totals.visits) AS visitCount | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| WHERE trafficSource.medium = "organic" |
| SELECT COUNT(totals.bounces)/COUNT(totals.visits) AS bounceRate | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| -- from: http://www.lunametrics.com/blog/2016/02/11/querying-google-analytics-data-in-bigquery/ |
| SELECT HOUR(SEC_TO_TIMESTAMP(visitStartTime)) AS sessionHour, | |
| COUNT(totals.bounces)/COUNT(totals.visits) AS bounceRate | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY sessionHour | |
| -- from: http://www.lunametrics.com/blog/2016/02/11/querying-google-analytics-data-in-bigquery/ |
| -- Per-hour bounce rate calculation | |
| SELECT HOUR(SEC_TO_TIMESTAMP(visitStartTime)) AS sessionHour, -- Extracts the hour from the unix timestamp | |
| -- COUNT is an aggregate function and will only count non-NULL values | |
| COUNT(totals.bounces)/COUNT(totals.visits) AS bounceRate -- Computes the bounce rate. Don't use AVERAGE! it ignores NULLs | |
| -- Let's just use the one example table | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY sessionHour | |
| -- from: http://www.lunametrics.com/blog/2016/02/11/querying-google-analytics-data-in-bigquery/ |
| SELECT trafficSource.medium AS medium, | |
| COUNT(totals.bounces)/COUNT(totals.visits) AS bounceRate | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY medium | |
| -- from: http://www.lunametrics.com/blog/2016/02/11/querying-google-analytics-data-in-bigquery/ |
| -- Mediums with the Most Sessions per Hour | |
| SELECT trafficSource.medium AS medium, | |
| HOUR(SEC_TO_TIMESTAMP(visitStartTime)) AS sessionHour, | |
| COUNT(totals.visits) AS sessionCount, | |
| COUNT(totals.bounces)/COUNT(totals.visits) AS bounceRate | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY medium, sessionHour | |
| ORDER BY sessionHour, sessionCount DESC | |
| -- from: http://www.lunametrics.com/blog/2016/02/11/querying-google-analytics-data-in-bigquery/ |
| SELECT date, | |
| SUM(totals.visits) AS sessions | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY date | |
| --http://www.lunametrics.com/blog/2016/06/23/google-analytics-bigquery-export-schema/ |
| SELECT date, | |
| SUM(totals.visits) AS sessions | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY date | |
| HAVING sessions > 70 | |
| -- http://www.lunametrics.com/blog/2016/06/23/google-analytics-bigquery-export-schema/ |
| SELECT date, | |
| SUM(totals.visits) AS sessions | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| WHERE totals.transactions > 0 | |
| GROUP BY date | |
| -- http://www.lunametrics.com/blog/2016/06/23/google-analytics-bigquery-export-schema/ |
| SELECT trafficSource.medium, | |
| SUM(totals.newVisits) / SUM(totals.visits) AS percentNewSessions | |
| FROM [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY trafficSource.medium |
| SELECT | |
| CONCAT(trafficSource.source,"/",trafficSource.medium) AS sourceMedium, | |
| newSessions / sessions AS percentNewSessions, | |
| bounces / sessions AS bounceRate | |
| FROM ( | |
| SELECT | |
| trafficSource.source, | |
| trafficSource.medium, | |
| SUM(totals.visits) AS sessions, | |
| SUM(totals.newVisits) AS newSessions, | |
| SUM(totals.bounces) AS bounces, | |
| FROM | |
| [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY | |
| trafficSource.source, | |
| trafficSource.medium) |
| SELECT | |
| trafficSource.medium, | |
| SUM(totals.visits) AS sessions, | |
| -- 5 will give the min, 25%, 50%, 75%, max with 20% error | |
| -- the more buckets, the better the approximation (error = 1/number of buckets) | |
| -- at the cost of more computation | |
| -- QUANTILES returns all of the buckets, use NTH to extract the bucket you want | |
| NTH(2, QUANTILES(totals.timeOnSite, 5)) AS firstQuartile, | |
| NTH(3, QUANTILES(totals.timeOnSite, 5)) AS mean, | |
| NTH(3, QUANTILES(totals.timeOnSite, 5)) AS thirdQuartile | |
| FROM ( | |
| SELECT | |
| trafficSource.medium, | |
| totals.visits, | |
| -- Sessions with a single page view will have no time on site reported | |
| IF(totals.timeOnSite IS NULL, 0, totals.timeOnSite) AS totals.timeOnSite | |
| FROM | |
| [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910]) | |
| GROUP BY | |
| trafficSource.medium |
| -- of people who viewed one specific page (pageA) | |
| -- which other pages (pageB) did they view and how many times (crossCount) ? | |
| SELECT | |
| sp as pageA, | |
| pp as pageB, | |
| COUNT(*) AS crossCount | |
| FROM ( | |
| SELECT | |
| sess.fullvisitorid AS sfvi, | |
| pages.fullVisitorid AS pfvi, | |
| sess.hits.page.pagePath AS sp, | |
| pages.hits.page.pagePath AS pp | |
| FROM | |
| FLATTEN([google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910], hits) AS sess | |
| CROSS JOIN | |
| FLATTEN([google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910], hits) AS pages | |
| WHERE | |
| -- ensures each combo is only counted a single time | |
| pages.fullvisitorid < sess.fullvisitorid | |
| AND pages.hits.page.pagePath < sess.hits.page.pagePath | |
| GROUP BY sfvi, pfvi, sp, pp ) | |
| GROUP BY pageA, pageB | |
| ORDER BY pageA, crossCount DESC | |
| -- from - http://www.lunametrics.com/blog/2016/05/12/self-joins-windowing-user-defined-functions-bigquery/ |
| - find the the page viewed before a page, | |
| - we could find all related pages in a session using a self-join, | |
| - filter out, using a WHERE clause because in BigQuery join conditions, in the ON, cannot have inequalities, | |
| SELECT | |
| cur_hit.fullVisitorId AS fullVisitorId, | |
| cur_hit.visitId AS visitID, | |
| cur_hit.hits.hitNumber AS cur_hitnumber, | |
| cur_hit.hits.page.pagePath as cur_pagePath, | |
| cur_hit.hits.time AS cur_time, | |
| MAX(prev_hit.hits.hitNumber) AS prev_hitNumber, | |
| FROM | |
| FLATTEN([google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910], hits) AS prev_hit | |
| INNER JOIN | |
| FLATTEN([google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910], hits) AS cur_hit | |
| ON | |
| prev_hit.fullvisitorid = cur_hit.fullvisitorid | |
| AND prev_hit.visitid = cur_hit.visitid | |
| WHERE | |
| prev_hit.hits.hitNumber < cur_hit.hits.hitNumber | |
| AND prev_hit.hits.type = "PAGE" | |
| AND cur_hit.hits.type = "PAGE" | |
| GROUP BY fullVisitorId, visitID, cur_hitnumber, cur_pagePath, cur_time | |
| -- http://www.lunametrics.com/blog/2016/05/12/self-joins-windowing-user-defined-functions-bigquery/ |
| -- using windowing to find time on page | |
| -- COUNT(num) OVER (PARTITION BY num ) as Var I | |
| -- different windowing functions can be used in the same query | |
| SELECT | |
| fullVisitorId, | |
| visitId, | |
| hits.hitNumber, | |
| hits.page.pagePath, | |
| -- get next and previous hit time to be able to work out length of each hit | |
| hits.time AS hit_time, | |
| LEAD(hits.time, 1) OVER (PARTITION BY fullVisitorId, visitId ORDER BY hits.time ASC) AS next_hit_time, | |
| FROM | |
| [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| WHERE | |
| hits.type = "PAGE" |
| SELECT | |
| fullVisitorId, | |
| visitId, | |
| hitNumber, | |
| pagePath, | |
| diff AS timeOnPage_ms | |
| FROM | |
| timePerPage( | |
| SELECT | |
| fullVisitorId, | |
| visitId, | |
| NEST(hits.page.pagePath) AS pages, | |
| NEST(hits.type) AS types, | |
| NEST(hits.time) AS times | |
| FROM | |
| [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] | |
| GROUP BY | |
| fullVisitorId, | |
| visitId | |
| LIMIT | |
| 10) |
| SELECT | |
| COUNT(*) as logins_after_product_view | |
| FROM ( | |
| SELECT | |
| SUM(IF(hits.eventinfo.eventCategory = "Login", 1, 0)) OVER (PARTITION BY fullvisitorid, visitid ROWS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) AS login_count, | |
| hits.page.pagePath as pagePath | |
| FROM | |
| [google.com:analytics-bigquery:LondonCycleHelmet.ga_sessions_20130910] ) | |
| WHERE | |
| login_count > 0 | |
| AND (pagePath CONTAINS 'vests' OR pagePath CONTAINS 'helments') | |
| -- from: http://www.lunametrics.com/blog/2016/02/11/querying-google-analytics-data-in-bigquery/ |