I’m querying review statuses for team so they can get a sense of who is doing all of the PR approvals and how long things are taking.
URL: https://github-url-here/api/graphql Header: Authorization: Bearer
query {
search(query: "org:ORGHERE is:pr created:>2021-01-01", type: ISSUE, last: 50) {
edges {
node {
... on PullRequest {
url
title
createdAt
author { login }
reviewDecision
reviews(first:100) {
edges {
node {
author { login }
publishedAt
state
}
}
}
}
}
}
}
}
Script to query the results
from datetime import datetime, timedelta
from collections import defaultdict
import json
def toDatetime(s):
return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")
reviewers = defaultdict(list)
oldestPr = None
with open('/tmp/data.json') as f:
data = json.loads(''.join(f.readlines()))
for pr in data['data']['search']['edges']:
created = toDatetime(pr['node']['createdAt'])
if oldestPr is None:
oldestPr = created
oldestPr = min(oldestPr, created)
oldestByPerson = defaultdict(list)
for review in pr['node']['reviews']['edges']:
person = review['node']['author']['login']
reviewedAt = toDatetime(review['node']['publishedAt'])
oldestByPerson[person].append(reviewedAt-created)
oldestByPerson = dict(oldestByPerson)
for person, times in oldestByPerson.items():
# if min(times).total_seconds() > 250000:
# print(f"WARNING: Something looks off. Should {pr['node']['title']} really have taken {min(times).total_seconds()} to review?")
reviewers[person].append(min([x.total_seconds() for x in times]))
print(f"Total data set is {len(data['data']['search']['edges'])} PRs since {oldestPr}")
import statistics as stats
sorted_reviewers = sorted(reviewers.items(), key=lambda x: -len(x[1]))
def print_time(td):
t = td.total_seconds()
days = int(t/(60*60*24))
t -= days * 60 * 60 * 24
hours = int(t/(60*60))
t -= hours * 60 * 60
minutes = int(t/60)
out = []
if days != 0:
out.append('%sd' % days)
if days != 0 or hours != 0:
out.append('%sh' % hours)
if days != 0 or hours != 0 or minutes != 0:
out.append('%sm' % minutes)
return ' '.join(out)
for reviewer, times in sorted_reviewers:
if len(times) < 2:
avg = 'N/A'
stdev = 'N/A'
else:
avg = print_time(timedelta(seconds=stats.mean(times)))
stdev = print_time(timedelta(seconds=stats.stdev(times)))
print(f"{reviewer}: {len(times)} reviews. Time to first review: (avg: {avg}; stddev {stdev})")