Last active
July 13, 2017 00:52
-
-
Save jeffcarp/f1fb015e38f50e82d30b8c69b67faa74 to your computer and use it in GitHub Desktop.
Analyze WPT Export latency
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Warning: this file is kind of a mess at the moment | |
import json | |
import csv | |
import requests | |
import re | |
import time | |
import subprocess | |
from dateutil import parser | |
import numpy | |
# from text_histogram import histogram | |
PR_FILE = 'q2prs.json' | |
MINS_FILE = 'mins.json' | |
CHROMIUM_DIR = '/usr/local/google/home/jeffcarp/chromium/src' | |
def fetch_all_q2_prs(): | |
print('Fetching all PRs') | |
base_url = 'https://api.github.com/search/issues?q=repo:w3c/web-platform-tests%20type:pr%20label:chromium-export%20is:merged' | |
r = requests.get(base_url) | |
data = r.json() | |
total = data['total_count'] | |
print(total, 'total PRs') | |
page_size = 50 | |
total_pages = int(total / page_size) + 2 | |
prs = [] | |
for page in range(1, total_pages): | |
print('Fetching page', page) | |
r = requests.get('{}&page={}&per_page={}'.format(base_url, page, page_size)) | |
data = r.json() | |
if 'items' not in data: | |
print('no items in data:', data) | |
break | |
prs.extend(data['items']) | |
print('Fetched', len(prs), 'PRs') | |
return prs | |
def fetch_and_write_q2_prs(): | |
prs = fetch_all_q2_prs() | |
with open(PR_FILE, 'w') as f: | |
json.dump(prs, f) | |
def get_local_q2_prs(): | |
with open(PR_FILE) as f: | |
return json.load(f) | |
def issue_url_from_body(body): | |
m = re.search('Review-Url: (.+)\n', body, re.MULTILINE) | |
print(m.groups()) | |
def get_sha_from_grep(url): | |
cmd = ['git', 'log', '--all', '--format=%H', '-1', '--grep=%s' % url] | |
p = subprocess.Popen(cmd, cwd=CHROMIUM_DIR, stdout=subprocess.PIPE) | |
print("the commandline is {}".format(p.args)) | |
print("the commandline is {}".format(' '.join(p.args))) | |
p.wait() | |
sha = p.stdout.readline().strip() | |
try: | |
assert len(sha) == 40, 'SHA not correct: %s' % sha | |
return sha | |
except AssertionError: | |
print('NO GOOD SHA') | |
def get_and_print_prs(): | |
prs = get_local_q2_prs() | |
print('Number of PRs in Q2:', len(prs)) | |
min_differences = [] | |
min_differences_by_month = {} | |
skipped = [] | |
for index, pr in enumerate(prs): | |
print('PR number', index) | |
print('PR URL:', 'https://github.com/w3c/web-platform-tests/pull/%s' % pr['number']) | |
pr_closed_at = parser.parse(pr['closed_at']) | |
# Double parse to remove timezone since Rietveld API (below) doesn't supply it | |
pr_closed_at = parser.parse(pr_closed_at.strftime('%Y-%m-%d %H:%M:%S')) | |
rietveld_issue = None | |
gerrit_issue = None | |
m = re.search('https:\/\/codereview\.chromium\.org\/(.+)\n', pr['body'], re.MULTILINE) | |
try: | |
rietveld_issue = m.groups()[0] | |
except AttributeError as e: | |
print('PROBABLY A GERRIT PR, SEARCHING') | |
m = re.search('Reviewed-on\: https\:\/\/chromium-review\.googlesource\.com\/(.+)\n', pr['body'], re.MULTILINE) | |
try: | |
gerrit_issue = m.groups()[0].strip() | |
except AttributeError as e: | |
print('Could not get issue number from Gerrit CL!') | |
raise | |
if rietveld_issue: | |
print('Found Rietveld issue', rietveld_issue) | |
sha = get_sha_from_grep('https://codereview.chromium.org/%s' % rietveld_issue) | |
''' | |
r = requests.get('https://codereview.chromium.org/api/{}'.format(rietveld_issue)) | |
cl_data = r.json() | |
# cl_modified_at = parser.parse(cl_data['modified']) | |
''' | |
elif gerrit_issue: | |
print('Found Gerrit issue', gerrit_issue) | |
sha = get_sha_from_grep('https://chromium-review.googlesource.com/%s' % gerrit_issue) | |
else: | |
raise ('No issue supplied!', rietveld_issue, gerrit_issue) | |
if not sha or len(sha) != 40: | |
print('SKIPPING!') | |
skipped.append(sha) | |
continue | |
''' | |
m = re.search('Committed: https:\/\/chromium\.googlesource\.com\/chromium\/src\/\+\/(\S+)', cl_data['description'], re.MULTILINE) | |
try: | |
sha = m.groups()[0] | |
except AttributeError as e: | |
print(cl_data) | |
raise 'NO COMMITED FOOTER ON ABOVE CL' | |
''' | |
print('found SHA', sha) | |
p = subprocess.Popen(['git', 'show', '-s', '--format=%ci', sha], cwd=CHROMIUM_DIR, stdout=subprocess.PIPE) | |
p.wait() | |
try: | |
commit_time = parser.parse(p.stdout.readline()) | |
commit_time = parser.parse(commit_time.strftime('%Y-%m-%d %H:%M:%S')) | |
except Exception as e: | |
print(e) | |
print('Mistakes were made') | |
continue | |
print('pr_closed_at', pr_closed_at) | |
print('commit_time', commit_time) | |
mins_difference = (pr_closed_at - commit_time).total_seconds() / 60 | |
print('mins diff:', mins_difference) | |
if mins_difference < 0: | |
print('NEGATIVE, SKIPPING') | |
skipped.append(sha) | |
continue | |
min_differences.append(mins_difference) | |
print('MONTH') | |
datekey = commit_time.strftime('%Y-%m') | |
if datekey not in min_differences_by_month: | |
min_differences_by_month[datekey] = [] | |
min_differences_by_month[datekey].append(mins_difference) | |
print('Done\n\n') | |
# print(pr['number'], pr['created_at'], pr['closed_at'], mins_difference) | |
''' | |
print('writing file', MINS_FILE) | |
with open(MINS_FILE, 'w') as f: | |
json.dump(min_differences, f) | |
''' | |
items = min_differences_by_month.items() | |
items = sorted(items, reverse=True, key=lambda i: i[0]) | |
# print(items) | |
with open('export-latencies.csv', 'w', newline='') as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=['Month', '50th percentile', '90th percentile']) | |
writer.writeheader() | |
for key, diffs in items: | |
writer.writerow({ | |
'Month': key, | |
'50th percentile': numpy.percentile(diffs, 50), | |
'90th percentile': numpy.percentile(diffs, 90) | |
}) | |
''' | |
for key, diffs in items: | |
print(key) | |
print('50th percentile', numpy.percentile(diffs, 50)) | |
print('90th percentile', numpy.percentile(diffs, 90)) | |
print() | |
''' | |
def analzye_mins(): | |
with open(MINS_FILE) as f: | |
min_differences = json.load(f) | |
out_of_sla = [] | |
in_sla = [] | |
for mins in min_differences: | |
if mins > 24 * 60: | |
out_of_sla.append(mins) | |
if mins < 35: | |
in_sla.append(mins) | |
average = sum(min_differences) / len(min_differences) | |
total = len(min_differences) | |
print('Average PR creation to merge latency:', average, 'minutes', '(', average / 60, 'hours)') | |
print(len(out_of_sla), '/', total, 'PRs out of 24H SLA -', len(out_of_sla) / total) | |
print(len(in_sla), '/', total, 'PRs inside 35m SLA -', len(in_sla) / total) | |
# print('Skipped', len(skipped), 'PRs due to negative timedelta') | |
histogram(min_differences) | |
print('50th percentile', numpy.percentile(min_differences, 50)) | |
print('90th percentile', numpy.percentile(min_differences, 90)) | |
def main(): | |
# fetch_and_write_q2_prs() | |
get_and_print_prs() | |
# analzye_mins() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment