Last active
February 13, 2024 13:37
-
-
Save hfleitas/89ab472e7dba3549fdebccc602a6722f to your computer and use it in GitHub Desktop.
2Ready2019GitHub.kql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#connect cluster('demo12.westus.kusto.windows.net').database('GitHub') | |
// Basics | |
// 1.2 Billions records | |
GithubEvent | |
| count | |
// Sample | |
GithubEvent | |
| take 10 | |
// access the Json attributes | |
GithubEvent | |
//| where CreatedAt > ago(200d) | |
| where Repo has 'kusto' | |
| extend name=tostring(Repo.name) | |
| distinct name | |
// chart of event per day for 60 days | |
GithubEvent | |
| where CreatedAt between (datetime(2017-12-01) .. 60d) | |
| summarize count() by bin(CreatedAt, 1d) | |
| render timechart | |
GithubEvent | |
| where Repo has 'Azure' | |
| project Repo, RepoName = Repo.name | |
| take 50 | |
//-------------------- | |
//-------------------- | |
//-------------------- | |
//-------------------- | |
// Advanced aggregations | |
GithubEvent | |
| where Type == "PushEvent" | |
| summarize count() by name=tostring(Repo.name) | |
| summarize percentiles(count_, 50, 90, 99) | |
// Joins | |
// WatchEvent is triggered when a repo is starred. | |
// Showing the top 5 popular repos by WatchEvent should give us a quick view into most popular technologies | |
GithubEvent | |
| where Type == "WatchEvent" | |
| summarize WatchEvents=count() by RepoName = tolower(tostring(Repo.name)) | |
| top 5 by WatchEvents | |
// You can see that some of the repos in the list are tutorial and samples | |
// Let's give it another shot with a slightly better tuned popularity indicator for technology repos | |
// Top 5 - popular repos by popularity indicator with different weights for WatchEvent and issue related events (IssueEvent, IssueCommentEvent) | |
let watchedRepos = | |
GithubEvent | |
| where Type == "WatchEvent" | |
| extend RepoName = tostring(Repo.name) | |
| summarize hint.shufflekey=RepoName WatchEvents=count() by RepoName; | |
let issuesRepos = | |
GithubEvent | |
| where Type in ("IssueCommentEvent", "IssuesEvent") | |
| extend RepoName = tostring(Repo.name) | |
| summarize hint.shufflekey=RepoName IssueEvents=count() by RepoName; | |
watchedRepos | |
| join hint.shufflekey=RepoName (issuesRepos) on RepoName | |
| extend Popularity=(WatchEvents * 0.3) + (IssueEvents*0.7) | |
| top 5 by Popularity | |
| project RepoName, Popularity | |
| render barchart | |
// I'm using Kusto analytics power to create two datasets in query time (repos that were watched and repos with issue related activity) | |
// and then joining them together to allow me to calculate a weighted popularity metric | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment