Created
June 13, 2024 16:56
-
-
Save fzakaria/7e642555ac125d148f1aa31103f1ea06 to your computer and use it in GitHub Desktop.
Malloy for GitHub webhook data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First we declare out source which is a single table. | |
# You can join other sources together and Malloy will take care of doing the | |
# correct join conditions. | |
# To see the github events you can look at: | |
# https://docs.github.com/en/webhooks/webhook-events-and-payloads | |
source: github_events is duckdb.table('./github_events.parquet') extend { | |
dimension: | |
is_pull_request is metadata.event = 'pull_request' | |
measure: | |
counts is count() | |
} | |
# Let's refine our source with some pre-canned filters and dimensions | |
# that make sense to the sub-payload we want to look at, | |
# in this case: pull_request events. | |
source: github_pr_events is github_events extend { | |
where: is_pull_request and is_person | |
dimension: | |
is_merged is payload.action = 'closed' and | |
payload.pull_request.merged = true | |
is_bot is not is_person | |
is_person is payload.pull_request.user.login != | |
'dependabot[bot]' & 'ConfluentJenkins' & | |
'confluent-renovate[bot]' & 'ConfluentSemaphore' & | |
'ConfluentTools' | |
changes is payload.pull_request.additions + | |
payload.pull_request.deletions | |
size is pick 'small' when changes < 25 | |
pick 'medium' when changes < 100 | |
pick 'large' when changes < 1000 | |
else 'huge' | |
view: by_size is { | |
group_by: size | |
aggregate: counts | |
} | |
view: by_author is { | |
group_by: payload.pull_request.user.login | |
aggregate: counts | |
top: 10 | |
} | |
view: by_repository is { | |
group_by: payload.repository.full_name | |
aggregate: counts | |
top: 10 | |
} | |
view: by_merged_at_time is { | |
where: is_merged | |
group_by: payload.pull_request.merged_at.day | |
aggregate: counts is count() | |
} | |
} | |
# Let's start with a simple line chart and view how many PRs we are | |
# merging over time. We add breakdowns for by author as well. | |
# dashboard | |
run: github_pr_events -> { | |
# line_chart | |
nest: by_merged_at_time | |
nest: by_author + { | |
top: 10 | |
# line_chart | |
nest: by_merged_at_time | |
} | |
} | |
# Let's take a look at the merged pull requests and see | |
# how they are broken up by repository. | |
# For each repository let's see the distribution of PR sizes. | |
# dashboard | |
run: github_pr_events -> { | |
where: is_merged | |
aggregate: counts | |
nest: by_repository + { | |
# bar_chart | |
nest: by_size | |
} | |
} | |
# Let's see the reverse, now. | |
# I am interested in the distribution of PR sizes, and the worst | |
# offenders by looking at the huge/large size. | |
# dashboard | |
run: github_pr_events -> { | |
where: is_merged | |
aggregate: counts | |
nest: by_size + { | |
# bar_chart | |
nest: by_repository | |
} | |
} | |
// Explain somtehin | |
run: github_pr_events -> { | |
nest: | |
by_repository + { | |
nest: by_author | |
} | |
by_author + { | |
nest: by_repository | |
} | |
} | |
run: github_pr_events extend { | |
where: payload.action = 'closed' and | |
payload.pull_request.merged = true | |
} -> { | |
select: * | |
} | |
# dashboard | |
run: github_events -> { | |
where: metadata.event = 'pull_request' and | |
payload.action = 'closed' and | |
payload.pull_request.merged = true | |
group_by: payload.pull_request.merged_at.month | |
aggregate: counts is count() | |
nest: by_repository is { | |
group_by: payload.repository.full_name | |
aggregate: counts is count() | |
top: 10 | |
# line_chart | |
nest: by_time is { | |
group_by: payload.pull_request.merged_at.day | |
aggregate: counts is count() | |
} | |
# bar_chart | |
nest: by_user is { | |
group_by: payload.pull_request.user.login | |
aggregate: counts is count() | |
top: 5 | |
} | |
} | |
} | |
run: github_events -> { | |
where: metadata.event = 'pull_request' and | |
payload.action = 'closed' and | |
payload.pull_request.merged = true | |
# group_by: Day_of_Week_Merged_At | |
aggregate: counts is count() | |
# bar_chart | |
nest: by_repository is { | |
group_by: payload.repository.full_name | |
aggregate: counts is count() | |
top: 10 | |
} | |
} | |
run: github_events -> { | |
where: metadata.event = 'pull_request' and | |
(payload.action = 'opened' or payload.action = 'closed') | |
group_by: payload.repository.full_name | |
aggregate: | |
merged_pr_count is count() { where: payload.pull_request.merged = true }, | |
opened_pr_count is count() { where: payload.action = 'opened' } | |
} -> { | |
select: | |
full_name | |
merged_pr_count | |
opened_pr_count | |
# percent | |
merge_rate is merged_pr_count / opened_pr_count | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment