Skip to content

Instantly share code, notes, and snippets.

@crwilcox
Last active February 19, 2020 05:17
Show Gist options
  • Save crwilcox/889ddd4b536a089bcad0533f6e145ac8 to your computer and use it in GitHub Desktop.
Save crwilcox/889ddd4b536a089bcad0533f6e145ac8 to your computer and use it in GitHub Desktop.
Scan GitHub using BigQuery
from google.cloud import bigquery
import json
GITHUB_USERNAME = 'crwilcox'
START_DATE = "2019-08-26"
END_DATE = "2020-02-16"
client = bigquery.client.Client()
query = f"""SELECT repository, type, event AS status, COUNT(*) AS count
FROM (
SELECT type, repo.name as repository, actor.login,
JSON_EXTRACT(payload, '$.action') AS event, created_at
FROM `githubarchive.day.20*`
WHERE actor.login = '{GITHUB_USERNAME}' AND
created_at BETWEEN TIMESTAMP('{START_DATE}') AND
TIMESTAMP('{END_DATE}')
)
GROUP BY repository, type, status ORDER BY repository, type, status;
"""
print(query)
query_job = client.query(query)
results = [i for i in query_job.result()]
# results = {}
# with open('query_result.json', 'r') as js:
# results = json.load(js)
# # print(results)
# Releases made
count = [int(row.count) for row in results
if row.type == 'ReleaseEvent']
print(f"{sum(count)} Releases across {len(count)} repos")
# PRs Made
count = [int(row.count) for row in results
if row.type == 'PullRequestEvent' and
row.status == "\"opened\""]
print(f"{sum(count)} PRs opened across {len(count)} repos")
# PR Comments Left
count = [int(row.count) for row in results
if row.type == 'PullRequestReviewCommentEvent']
print(f"{sum(count)} PR comments across {len(count)} repos")
# Issues Created
count = [int(row.count) for row in results
if row.type == 'IssuesEvent' and
row.status == "\"opened\""]
print(f"{sum(count)} issues opened across {len(count)} repos")
# Issues Closed
count = [int(row.count) for row in results
if row.type == 'IssuesEvent' and
row.status == "\"closed\""]
print(f"{sum(count)} issues closed across {len(count)} repos")
# Issue Comments
count = [int(row.count) for row in results
if row.type == 'IssueCommentEvent']
print(f"{sum(count)} issue comments across {len(count)} repos")
# Push Events
count = [int(row.count) for row in results
if row.type == 'PushEvent']
print(f"{sum(count)} pushes across {len(count)} repos")
#!/bin/bash
enddate=`date +%Y-%m-%d -d "yesterday"`
startdate=`date +%Y-%m-%d -d "7 days ago"`
ghusername='crwilcox'
startdate='2018-03-05'
echo "query GitHub for user $ghusername from $startdate to $enddate"
bq query "SELECT repository, type, event AS status, COUNT(*) AS count \
FROM ( \
SELECT type, repo.name as repository, actor.login, JSON_EXTRACT(payload, '\$.action') AS event, \
FROM (TABLE_DATE_RANGE([githubarchive:day.], TIMESTAMP('$startdate'), TIMESTAMP('$enddate') )) \
WHERE actor.login = '$ghusername' ) \
GROUP BY repository, type, status ORDER BY repository, type, status;"
@tswast
Copy link

tswast commented Sep 12, 2018

I fixed your TODO for 2019 queries. https://gist.github.com/tswast/71cd4adb5b2308d67639fe6324357d31/revisions

Consider this a pull request. :-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment