Created
June 27, 2017 21:25
-
-
Save jwhitlock/e8d324821bb68358799529e2b6ebab42 to your computer and use it in GitHub Desktop.
Analyze PR merges for mozilla/kuma
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from datetime import date, datetime, timedelta, tzinfo | |
import argparse | |
import requests | |
import requests_cache | |
import csv | |
import pprint | |
current = date.today() | |
# MDN full-time dev staff, with approx start and end, and "full-time" factor | |
dev_staff = ( | |
('darkwing', date(2012, 4, 24), date(2015, 12, 15), 1), | |
('escattone', date(2016, 9, 26), current, 1), | |
('groovecoder', date(2011, 2, 15), date(2015, 12, 31), 1), | |
('jezdez', date(2013, 7, 1), date(2016, 3, 31), 1), | |
('jpetto', date(2017, 1, 1), date(2017, 3, 31), 1), | |
('jwhitlock', date(2015, 7, 1), current, 1), | |
('lmorchard', date(2011, 7, 12), date(2014, 5, 5), 1), | |
('openjck', date(2016, 1, 15), date(2012, 3, 26), 1), | |
('robhudson', date(2014, 3, 7), date(2016, 3, 31), 1), | |
('schalkneethling', date(2017, 4, 1), current, 1), | |
('stephaniehobson', date(2014, 6, 9), current, 1), | |
('ubernostrum', date(2011, 8, 22), date(2015, 6, 1), 1), | |
('willkg', date(2016, 1, 1), date(2016, 3, 31), 1), | |
('dchukhin', date(2016, 4, 1), date(2016, 7, 1), .25), | |
('emullaney', date(2016, 4, 1), date(2016, 7, 1), .25), | |
('jbradberry', date(2016, 4, 1), date(2016, 7, 1), 0), | |
('jsocol', date(2010, 2, 1), date(2013, 4, 5), 1), | |
) | |
other_staff = set(( | |
'Elchi3', | |
'JeremiePat', | |
'Osmose', | |
'Sheeri', | |
'a2sheppy', | |
'bensternthal', | |
'chrisdavidmills', | |
'davehunt', | |
'escattone', | |
'glogiotatidis', | |
'hoosteeno', | |
'jgmize', | |
'lonnen', | |
'metadave', | |
'teoli2003', | |
'wbamberg', | |
)) | |
staff = set([staff[0] for staff in dev_staff]) | other_staff | |
def user_is_staff(username): | |
return username in staff | |
class UTC(tzinfo): | |
"""UTC""" | |
ZERO = timedelta(0) | |
def utcoffset(self, dt): | |
return self.ZERO | |
def tzname(self, dt): | |
return "UTC" | |
def dst(self, dt): | |
return self.ZERO | |
utc = UTC() | |
def to_datetime(raw_date_str): | |
if raw_date_str is None: | |
return '' | |
dateformat = '%Y-%m-%dT%H:%M:%S' | |
date_str = raw_date_str | |
if raw_date_str.endswith('Z'): | |
date_str = date_str[:-1] | |
dt = datetime.strptime(date_str, dateformat) | |
return dt | |
def merged_pull_requests(owner, repo, client_id, client_secret, state='open', page=1): | |
url_params = {'owner': owner, 'repo': repo} | |
url_pat = 'https://api.github.com/repos/%(owner)s/%(repo)s/pulls' | |
payload = { | |
'client_id': client_id, | |
'client_secret': client_secret, | |
} | |
if page > 1: | |
payload['page'] = str(page) | |
if state != 'open': | |
payload['state'] = state | |
resp = requests.get(url_pat % url_params, params=payload) | |
print(resp.url) | |
prs = [] | |
out = resp.json() | |
if 'message' in out: | |
pprint.pprint(out) | |
raise Exception(resp.text) | |
for pr in resp.json(): | |
if pr['merged_at']: | |
data = { | |
'number': pr['number'], | |
'username': pr['user']['login'], | |
'title': pr['title'], | |
'merged_at': to_datetime(pr['merged_at']), | |
'created_at': to_datetime(pr['created_at']), | |
} | |
data['is_staff'] = 1 if user_is_staff(data['username']) else 0 | |
data['created_month'] = data['created_at'].strftime('%Y-%m') | |
data['merged_month'] = data['merged_at'].strftime('%Y-%m') | |
data['secs_open'] = (data['merged_at'] - data['created_at']).total_seconds() | |
data['days_open'] = (data['merged_at'] - data['created_at']).days | |
prs.append(data) | |
return prs | |
columns = [ | |
'number', | |
'username', | |
'is_staff', | |
'title', | |
'created_at', | |
'created_month', | |
'merged_at', | |
'merged_month', | |
'secs_open', | |
'days_open', | |
] | |
def pull_requests_merged_in_range(owner, repo, client_id, client_secret, start): | |
done = False | |
pr_rows = [] | |
page = 0 | |
while not done: | |
page += 1 | |
prs = merged_pull_requests(owner, repo, client_id, client_secret, 'closed', page) | |
for pr in prs: | |
if pr['created_at'].date() < start: | |
done = True | |
else: | |
pr_rows.append(tuple(unicode(pr[item]).encode('utf8') for item in columns)) | |
pr_rows.sort() | |
return pr_rows | |
by_month_columns = [ | |
'month', | |
'staff', | |
'prs', | |
'by_staff', | |
'by_other', | |
'avg_secs_open', | |
'avg_days_open', | |
'prs_over_stafff', | |
'avg_secs_open_over_staff', | |
'avg_days_open_over_staff', | |
] | |
def pull_requests_by_month(prs): | |
by_month = {} | |
for pr in prs: | |
pr_dict = {name: val for name, val in zip(columns, pr)} | |
month = pr_dict['merged_month'] | |
by_month.setdefault(month, []).append(pr_dict) | |
by_month_rows = [] | |
for month in sorted(by_month.keys()): | |
# How many full time staff members? | |
staff_count = 0.0 | |
raw_year, raw_month = month.split('-') | |
start_date = date(int(raw_year), int(raw_month), 1) | |
end_date = start_date | |
while end_date.month == start_date.month: | |
end_date += timedelta(days=1) | |
for username, staff_start, staff_end, factor in dev_staff: | |
if staff_start < end_date and staff_end >= start_date: | |
staff_count += factor | |
# Aggregate other data | |
by_staff, by_other, days, secs = 0, 0, 0.0, 0.0 | |
for pr_dict in by_month[month]: | |
if pr_dict['is_staff'] == '1': | |
by_staff += 1 | |
else: | |
by_other += 1 | |
days += float(pr_dict['days_open']) | |
secs += float(pr_dict['secs_open']) | |
total = by_staff + by_other | |
avg_days = days / float(total) | |
avg_secs = secs / float(total) | |
prs_over_staff = float(total) / float(staff_count) | |
avg_days_over_staff = avg_days / float(staff_count) | |
avg_secs_over_staff = avg_secs / float(staff_count) | |
by_month_rows.append(( | |
month, | |
staff_count, | |
total, | |
by_staff, | |
by_other, | |
avg_secs, | |
avg_days, | |
prs_over_staff, | |
avg_secs_over_staff, | |
avg_days_over_staff, | |
)) | |
return by_month_rows | |
def debug_staff(prs): | |
staff_users = set() | |
nonstaff_users = set() | |
for pr in prs: | |
username = pr[1] | |
is_staff = pr[2] | |
if is_staff == '1': | |
staff_users.add(username) | |
else: | |
nonstaff_users.add(username) | |
print("Staff users:") | |
pprint.pprint(sorted(staff_users)) | |
print("\nNon-staff users:") | |
pprint.pprint(sorted(nonstaff_users)) | |
def valid_date(raw_date): | |
try: | |
return datetime.strptime(raw_date, "%Y-%m-%d").date() | |
except ValueError: | |
msg = "Not a valid date: '{0}'.".format(raw_date) | |
raise argparse.ArgumentTypeError(msg) | |
def get_args(): | |
parser = argparse.ArgumentParser(description='Process some integers.') | |
parser.add_argument('client_id', type=str, help='GitHub client ID') | |
parser.add_argument('client_secret', type=str, help='GitHub client secret') | |
parser.add_argument('--start', type=valid_date, | |
help='start date, YYYY-MM-DD, default 2014-1-1', | |
default='2014-01-01') | |
parser.add_argument('--cachefile', type=str, | |
help='cache file for GitHub requests', | |
default='github_cache') | |
args = parser.parse_args() | |
return args | |
if __name__ == '__main__': | |
org = 'mozilla' | |
repo = 'kuma' | |
args = get_args() | |
requests_cache.install_cache(args.cachefile) | |
prs = pull_requests_merged_in_range(org, repo, args.client_id, | |
args.client_secret, args.start) | |
debug_staff(prs) | |
with file('github.csv', 'wb') as csvfile: | |
writer = csv.writer(csvfile) | |
writer.writerow(columns) | |
writer.writerows(prs) | |
by_month = pull_requests_by_month(prs) | |
with file('prs_by_month.csv', 'wb') as csvfile: | |
writer = csv.writer(csvfile) | |
writer.writerow(by_month_columns) | |
writer.writerows(by_month) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment