Created
April 14, 2023 17:04
-
-
Save ianmcook/ace2bd219d77ed46b1c102eb4c11836d to your computer and use it in GitHub Desktop.
Ibis BigQuery github_nested example query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import google.auth | |
import ibis | |
from ibis import _ | |
credentials, billing_project = google.auth.default() | |
conn = ibis.bigquery.connect(billing_project, 'bigquery-public-data.samples') | |
t = conn.table('github_nested') | |
expr = ( | |
t.mutate( | |
# get the hour in UTC during which a repo was created | |
hour=_.created_at.to_timestamp('%Y/%m/%d %T %z').hour(), | |
# compute the UTC offset to adjust in the next expression | |
utc_offset=_.created_at.split(' ')[2].cast('int64') // 100 | |
) | |
# group by the adjusted hour, count and sort by descending count | |
.group_by(hour=_.hour + _.utc_offset) | |
.count() | |
.order_by(ibis.desc('count')) | |
# sum up the number of repos that were created between midnight and 4 AM | |
# local time | |
.aggregate( | |
total=_['count'].sum(), | |
night_owl=_['count'].sum(where=_.hour.between(0, 4)) | |
) | |
# compute the percentage of repos created between midnight and 4 AM | |
.mutate(night_owl_perc=_.night_owl / _.total) | |
) | |
df = expr.execute() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a version of the Ibis code shown in this Google Cloud blog post that has been updated to use the underscore API added in Ibis 3.1.