Skip to content

Instantly share code, notes, and snippets.

@cpcloud
Created October 3, 2018 20:08
Show Gist options
  • Save cpcloud/a991b2d6e3c6abeffa71d4520f21be7b to your computer and use it in GitHub Desktop.
Save cpcloud/a991b2d6e3c6abeffa71d4520f21be7b to your computer and use it in GitHub Desktop.
Ibis Night Owl Query
con = ibis.bigquery.connect(billing_project, 'bigquery-public-data.samples')
t = con.table('github_nested')
expr = (
t.mutate(
# get the hour in UTC during which a repo was created
hour=lambda t: t.created_at
.to_timestamp('%Y/%m/%d %T %z')
.hour(),
# compute the UTC offset to adjust in the next expression
utc_offset=lambda t: t.created_at.split(' ')[2].cast('int64') // 100)
# group by the adjusted hour, count and sort by descending count
.groupby(hour=lambda t: t.hour + t.utc_offset)
.count()
.sort_by(ibis.desc('count'))
# sum up the number of repos that were created between midnight and 4 AM
# local time
.aggregate(
total=lambda t: t['count'].sum(),
night_owl=lambda t: t['count'].sum(where=t.hour.between(0, 4)))
# compute the percentage of repos created between midnight and 4 AM
.mutate(night_owl_perc=lambda t: t.night_owl / t.total)
)
df = expr.execute()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment