jeffcarp · July 13, 2017 00:52
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/python3

 # Warning: this file is kind of a mess at the moment

 import json
 import csv
 import requests
 import re
 import time
 import subprocess
 from dateutil import parser
 import numpy
 # from text_histogram import histogram

 PR_FILE = 'q2prs.json'
 MINS_FILE = 'mins.json'
 CHROMIUM_DIR = '/usr/local/google/home/jeffcarp/chromium/src'

 def fetch_all_q2_prs():
    print('Fetching all PRs')
    base_url = 'https://api.github.com/search/issues?q=repo:w3c/web-platform-tests%20type:pr%20label:chromium-export%20is:merged'

    r = requests.get(base_url)
    data = r.json()

    total = data['total_count']

    print(total, 'total PRs')

    page_size = 50
    total_pages = int(total / page_size) + 2

    prs = []

    for page in range(1, total_pages):
        print('Fetching page', page)
        r = requests.get('{}&page={}&per_page={}'.format(base_url, page, page_size))
        data = r.json()
        if 'items' not in data:
            print('no items in data:', data)
            break
        prs.extend(data['items'])

    print('Fetched', len(prs), 'PRs')

    return prs

 def fetch_and_write_q2_prs():
    prs = fetch_all_q2_prs()
    with open(PR_FILE, 'w') as f:
        json.dump(prs, f)

 def get_local_q2_prs():
    with open(PR_FILE) as f:
        return json.load(f)

 def issue_url_from_body(body):
    m = re.search('Review-Url: (.+)\n', body, re.MULTILINE)
    print(m.groups())

 def get_sha_from_grep(url):
    cmd = ['git', 'log', '--all', '--format=%H', '-1', '--grep=%s' % url]
    p = subprocess.Popen(cmd, cwd=CHROMIUM_DIR, stdout=subprocess.PIPE)
    print("the commandline is {}".format(p.args))
    print("the commandline is {}".format(' '.join(p.args)))
    p.wait()

    sha = p.stdout.readline().strip()
    try:
        assert len(sha) == 40, 'SHA not correct: %s' % sha
        return sha
    except AssertionError:
        print('NO GOOD SHA')

 def get_and_print_prs():
    prs = get_local_q2_prs()

    print('Number of PRs in Q2:', len(prs))

    min_differences = []
    min_differences_by_month = {}
    skipped = []

    for index, pr in enumerate(prs):
        print('PR number', index)
        print('PR URL:', 'https://github.com/w3c/web-platform-tests/pull/%s' % pr['number'])
        pr_closed_at = parser.parse(pr['closed_at'])

        # Double parse to remove timezone since Rietveld API (below) doesn't supply it
        pr_closed_at = parser.parse(pr_closed_at.strftime('%Y-%m-%d %H:%M:%S'))

        rietveld_issue = None
        gerrit_issue = None

        m = re.search('https:\/\/codereview\.chromium\.org\/(.+)\n', pr['body'], re.MULTILINE)
        try:
            rietveld_issue = m.groups()[0]
        except AttributeError as e:
            print('PROBABLY A GERRIT PR, SEARCHING')

            m = re.search('Reviewed-on\: https\:\/\/chromium-review\.googlesource\.com\/(.+)\n', pr['body'], re.MULTILINE)

            try:
                gerrit_issue = m.groups()[0].strip()
            except AttributeError as e:
                print('Could not get issue number from Gerrit CL!')
                raise

        if rietveld_issue:
            print('Found Rietveld issue', rietveld_issue)
            sha = get_sha_from_grep('https://codereview.chromium.org/%s' % rietveld_issue)
            '''
            r = requests.get('https://codereview.chromium.org/api/{}'.format(rietveld_issue))
            cl_data = r.json()
            # cl_modified_at = parser.parse(cl_data['modified'])
            '''
        elif gerrit_issue:
            print('Found Gerrit issue', gerrit_issue)
            sha = get_sha_from_grep('https://chromium-review.googlesource.com/%s' % gerrit_issue)
        else:
            raise ('No issue supplied!', rietveld_issue, gerrit_issue)

        if not sha or len(sha) != 40:
            print('SKIPPING!')
            skipped.append(sha)
            continue

        '''
        m = re.search('Committed: https:\/\/chromium\.googlesource\.com\/chromium\/src\/\+\/(\S+)', cl_data['description'], re.MULTILINE)
        try:
            sha = m.groups()[0]
        except AttributeError as e:
            print(cl_data)
            raise 'NO COMMITED FOOTER ON ABOVE CL'
        '''

        print('found SHA', sha)

        p = subprocess.Popen(['git', 'show', '-s', '--format=%ci', sha], cwd=CHROMIUM_DIR, stdout=subprocess.PIPE)
        p.wait()
        try:
            commit_time = parser.parse(p.stdout.readline())
            commit_time  = parser.parse(commit_time.strftime('%Y-%m-%d %H:%M:%S'))
        except Exception as e:
            print(e)
            print('Mistakes were made')
            continue

        print('pr_closed_at', pr_closed_at)
        print('commit_time', commit_time)
        mins_difference = (pr_closed_at - commit_time).total_seconds() / 60
        print('mins diff:', mins_difference)
        if mins_difference < 0:
            print('NEGATIVE, SKIPPING')
            skipped.append(sha)
            continue

        min_differences.append(mins_difference)
        print('MONTH')
        datekey = commit_time.strftime('%Y-%m')
        if datekey not in min_differences_by_month:
            min_differences_by_month[datekey] = []
        min_differences_by_month[datekey].append(mins_difference)

        print('Done\n\n')

        # print(pr['number'], pr['created_at'], pr['closed_at'], mins_difference)
    '''
    print('writing file', MINS_FILE)
    with open(MINS_FILE, 'w') as f:
        json.dump(min_differences, f)
    '''
    items = min_differences_by_month.items()
    items = sorted(items, reverse=True, key=lambda i: i[0])
    # print(items)

    with open('export-latencies.csv', 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['Month', '50th percentile', '90th percentile'])
        writer.writeheader()

        for key, diffs in items:
            writer.writerow({
                'Month': key,
                '50th percentile': numpy.percentile(diffs, 50),
                '90th percentile': numpy.percentile(diffs, 90)
            })

    '''
    for key, diffs in items:
        print(key)
        print('50th percentile', numpy.percentile(diffs, 50))
        print('90th percentile', numpy.percentile(diffs, 90))
        print()
    '''


 def analzye_mins():
    with open(MINS_FILE) as f:
        min_differences = json.load(f)

    out_of_sla = []
    in_sla = []
    for mins in min_differences:
        if mins > 24 * 60:
            out_of_sla.append(mins)
        if mins < 35:
            in_sla.append(mins)

    average = sum(min_differences) / len(min_differences)
    total = len(min_differences)
    print('Average PR creation to merge latency:', average, 'minutes', '(', average / 60, 'hours)')
    print(len(out_of_sla), '/', total, 'PRs out of 24H SLA -', len(out_of_sla) / total)
    print(len(in_sla), '/', total, 'PRs inside 35m SLA -', len(in_sla) / total)
    # print('Skipped', len(skipped), 'PRs due to negative timedelta')

    histogram(min_differences)

    print('50th percentile', numpy.percentile(min_differences, 50))
    print('90th percentile', numpy.percentile(min_differences, 90))

 def main():
    # fetch_and_write_q2_prs()
    get_and_print_prs()
    # analzye_mins()

 if __name__ == '__main__':
    main()
	#!/usr/bin/python3

	# Warning: this file is kind of a mess at the moment

	import json
	import csv
	import requests
	import re
	import time
	import subprocess
	from dateutil import parser
	import numpy
	# from text_histogram import histogram

	PR_FILE = 'q2prs.json'
	MINS_FILE = 'mins.json'
	CHROMIUM_DIR = '/usr/local/google/home/jeffcarp/chromium/src'

	def fetch_all_q2_prs():
	print('Fetching all PRs')
	base_url = 'https://api.github.com/search/issues?q=repo:w3c/web-platform-tests%20type:pr%20label:chromium-export%20is:merged'

	r = requests.get(base_url)
	data = r.json()

	total = data['total_count']

	print(total, 'total PRs')

	page_size = 50
	total_pages = int(total / page_size) + 2

	prs = []

	for page in range(1, total_pages):
	print('Fetching page', page)
	r = requests.get('{}&page={}&per_page={}'.format(base_url, page, page_size))
	data = r.json()
	if 'items' not in data:
	print('no items in data:', data)
	break
	prs.extend(data['items'])

	print('Fetched', len(prs), 'PRs')

	return prs

	def fetch_and_write_q2_prs():
	prs = fetch_all_q2_prs()
	with open(PR_FILE, 'w') as f:
	json.dump(prs, f)

	def get_local_q2_prs():
	with open(PR_FILE) as f:
	return json.load(f)

	def issue_url_from_body(body):
	m = re.search('Review-Url: (.+)\n', body, re.MULTILINE)
	print(m.groups())

	def get_sha_from_grep(url):
	cmd = ['git', 'log', '--all', '--format=%H', '-1', '--grep=%s' % url]
	p = subprocess.Popen(cmd, cwd=CHROMIUM_DIR, stdout=subprocess.PIPE)
	print("the commandline is {}".format(p.args))
	print("the commandline is {}".format(' '.join(p.args)))
	p.wait()

	sha = p.stdout.readline().strip()
	try:
	assert len(sha) == 40, 'SHA not correct: %s' % sha
	return sha
	except AssertionError:
	print('NO GOOD SHA')

	def get_and_print_prs():
	prs = get_local_q2_prs()

	print('Number of PRs in Q2:', len(prs))

	min_differences = []
	min_differences_by_month = {}
	skipped = []

	for index, pr in enumerate(prs):
	print('PR number', index)
	print('PR URL:', 'https://github.com/w3c/web-platform-tests/pull/%s' % pr['number'])
	pr_closed_at = parser.parse(pr['closed_at'])

	# Double parse to remove timezone since Rietveld API (below) doesn't supply it
	pr_closed_at = parser.parse(pr_closed_at.strftime('%Y-%m-%d %H:%M:%S'))

	rietveld_issue = None
	gerrit_issue = None

	m = re.search('https:\/\/codereview\.chromium\.org\/(.+)\n', pr['body'], re.MULTILINE)
	try:
	rietveld_issue = m.groups()[0]
	except AttributeError as e:
	print('PROBABLY A GERRIT PR, SEARCHING')

	m = re.search('Reviewed-on\: https\:\/\/chromium-review\.googlesource\.com\/(.+)\n', pr['body'], re.MULTILINE)

	try:
	gerrit_issue = m.groups()[0].strip()
	except AttributeError as e:
	print('Could not get issue number from Gerrit CL!')
	raise

	if rietveld_issue:
	print('Found Rietveld issue', rietveld_issue)
	sha = get_sha_from_grep('https://codereview.chromium.org/%s' % rietveld_issue)
	'''
	r = requests.get('https://codereview.chromium.org/api/{}'.format(rietveld_issue))
	cl_data = r.json()
	# cl_modified_at = parser.parse(cl_data['modified'])
	'''
	elif gerrit_issue:
	print('Found Gerrit issue', gerrit_issue)
	sha = get_sha_from_grep('https://chromium-review.googlesource.com/%s' % gerrit_issue)
	else:
	raise ('No issue supplied!', rietveld_issue, gerrit_issue)

	if not sha or len(sha) != 40:
	print('SKIPPING!')
	skipped.append(sha)
	continue

	'''
	m = re.search('Committed: https:\/\/chromium\.googlesource\.com\/chromium\/src\/\+\/(\S+)', cl_data['description'], re.MULTILINE)
	try:
	sha = m.groups()[0]
	except AttributeError as e:
	print(cl_data)
	raise 'NO COMMITED FOOTER ON ABOVE CL'
	'''

	print('found SHA', sha)

	p = subprocess.Popen(['git', 'show', '-s', '--format=%ci', sha], cwd=CHROMIUM_DIR, stdout=subprocess.PIPE)
	p.wait()
	try:
	commit_time = parser.parse(p.stdout.readline())
	commit_time = parser.parse(commit_time.strftime('%Y-%m-%d %H:%M:%S'))
	except Exception as e:
	print(e)
	print('Mistakes were made')
	continue

	print('pr_closed_at', pr_closed_at)
	print('commit_time', commit_time)
	mins_difference = (pr_closed_at - commit_time).total_seconds() / 60
	print('mins diff:', mins_difference)
	if mins_difference < 0:
	print('NEGATIVE, SKIPPING')
	skipped.append(sha)
	continue

	min_differences.append(mins_difference)
	print('MONTH')
	datekey = commit_time.strftime('%Y-%m')
	if datekey not in min_differences_by_month:
	min_differences_by_month[datekey] = []
	min_differences_by_month[datekey].append(mins_difference)

	print('Done\n\n')

	# print(pr['number'], pr['created_at'], pr['closed_at'], mins_difference)
	'''
	print('writing file', MINS_FILE)
	with open(MINS_FILE, 'w') as f:
	json.dump(min_differences, f)
	'''
	items = min_differences_by_month.items()
	items = sorted(items, reverse=True, key=lambda i: i[0])
	# print(items)

	with open('export-latencies.csv', 'w', newline='') as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=['Month', '50th percentile', '90th percentile'])
	writer.writeheader()

	for key, diffs in items:
	writer.writerow({
	'Month': key,
	'50th percentile': numpy.percentile(diffs, 50),
	'90th percentile': numpy.percentile(diffs, 90)
	})

	'''
	for key, diffs in items:
	print(key)
	print('50th percentile', numpy.percentile(diffs, 50))
	print('90th percentile', numpy.percentile(diffs, 90))
	print()
	'''


	def analzye_mins():
	with open(MINS_FILE) as f:
	min_differences = json.load(f)

	out_of_sla = []
	in_sla = []
	for mins in min_differences:
	if mins > 24 * 60:
	out_of_sla.append(mins)
	if mins < 35:
	in_sla.append(mins)

	average = sum(min_differences) / len(min_differences)
	total = len(min_differences)
	print('Average PR creation to merge latency:', average, 'minutes', '(', average / 60, 'hours)')
	print(len(out_of_sla), '/', total, 'PRs out of 24H SLA -', len(out_of_sla) / total)
	print(len(in_sla), '/', total, 'PRs inside 35m SLA -', len(in_sla) / total)
	# print('Skipped', len(skipped), 'PRs due to negative timedelta')

	histogram(min_differences)

	print('50th percentile', numpy.percentile(min_differences, 50))
	print('90th percentile', numpy.percentile(min_differences, 90))

	def main():
	# fetch_and_write_q2_prs()
	get_and_print_prs()
	# analzye_mins()

	if __name__ == '__main__':
	main()