Created
December 21, 2021 23:47
-
-
Save jmaher/dfc293fffa60ddfb416e8541f0de6585 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import math | |
import os | |
import requests | |
DEFAULT_REQUEST_HEADERS = { | |
'Accept': 'application/json', | |
'User-Agent': 'ouija', | |
} | |
def fetch_json(url): | |
response = requests.get(url, headers=DEFAULT_REQUEST_HEADERS, timeout=30) | |
response.raise_for_status() | |
return response.json() | |
def getPerfData(branch, revision): | |
url = "https://treeherder.mozilla.org/api/performance/summary" \ | |
"/?repository=%s&framework=13&interval=1209600&no_subtests=true&revision=%s" % (branch, revision) | |
filename = "%s_%s.json" % (branch, revision) | |
if os.path.exists(filename): | |
with open(filename, 'r') as f: | |
return json.load(f) | |
else: | |
data = fetch_json(url) | |
with open(filename, 'w') as f: | |
json.dump(data, f) | |
return data | |
def filterPerfData(data): | |
retVal = [] | |
for item in data: | |
if not item['values'] or len(item['values']) < 5: | |
continue | |
# fission is all we care about | |
if len(item['name'].split('fission')) < 2: | |
continue | |
retVal.append(item) | |
return retVal | |
def avg(values): | |
retVal = 0 | |
sum = 0 | |
for v in values: | |
sum += v | |
retVal = sum / len(values)*1.0 | |
return retVal | |
def stddev(values): | |
retVal = 0 | |
sqsum = 0 | |
m = avg(values) | |
for v in values: | |
sqsum += (v-m) * (v-m) | |
retVal = sqsum / (len(values)-1) | |
retVal = math.sqrt(retVal) | |
return retVal | |
def getttest(base, new): | |
# this is simplified due to len(base|new) > 1 | |
avgbase = avg(base) | |
avgnew = avg(new) | |
stdbase = stddev(base) | |
stdnew = stddev(new) | |
delta = avgnew - avgbase | |
stddifferr = math.sqrt(((stdbase*stdbase) / len(base)) + ((stdnew*stdnew) / len(new))) | |
# if all [values] are the same, stddev == 0, therefore, we need to return 0; | |
if (stddifferr == 0): | |
return 10 | |
return delta / stddifferr | |
def summarizePerfData(data): | |
retVal = {} | |
for item in data: | |
if item['name'] not in retVal.keys(): | |
std = stddev(item['values']) | |
stddevpct = (std / avg(item['values'])) * 100 | |
retVal[item['name']] = {'values': item['values'], | |
'avg': avg(item['values']), | |
'stddev': stddevpct, | |
'lib': item['lower_is_better']} | |
return retVal | |
def getSummary(branch, rev): | |
data = getPerfData(branch, rev) | |
data = filterPerfData(data) | |
return summarizePerfData(data) | |
def getRegressionData(branch, before, after): | |
regressions = {} | |
bdata = getSummary(branch, before) | |
adata = getSummary(branch, after) | |
# find diff of values and stddevpct | |
for metric in bdata: | |
if metric not in adata: | |
continue | |
# positive is a regression | |
pctval = ((adata[metric]['avg'] - bdata[metric]['avg']) / bdata[metric]['avg']) * 100 | |
# this is the difference in noise percentage- ideally lower noise | |
noise = bdata[metric]['stddev'] - adata[metric]['stddev'] | |
# used for confidence / ismeaningful- only looking for high confidence | |
ttest = abs(getttest(bdata[metric]['values'], adata[metric]['values'])) | |
# meaningful helps us filter data quickly | |
meaningful = False | |
newIsBetter = bdata[metric]['lib'] and ((adata[metric]['avg'] - bdata[metric]['avg']) < 0) | |
ratio = bdata[metric]['avg'] / adata[metric]['avg'] | |
if ratio < 1: | |
ratio = 1 / ratio | |
if ratio < 1.02 or ttest < 3: | |
meaningful = False | |
elif ttest < 5: | |
if not newIsBetter: | |
meaningful = True | |
else: | |
meaningful = True | |
regressions[metric] = {'regression': pctval, | |
'noise': noise, | |
'meaningful': meaningful} | |
return regressions | |
branch = 'try' | |
# moonshots | |
before = '3a3b2003a298fce78f75ca936783e65b59e126d6' | |
after = '102f6a4387632957e21ee1e7c47a7c87db25b236' | |
basedata = getRegressionData(branch, before, after) | |
azure_before = '24514a27ef4638a7a87605da8275bb52c5985b85' | |
azure_after = 'c2e59e62232ac6c0a99066ceed30f0314c479d81' | |
newdata = getRegressionData(branch, azure_before, azure_after) | |
metrics = basedata.keys() | |
metrics.sort() | |
for metric in metrics: | |
if metric not in newdata.keys() or \ | |
not newdata[metric]['meaningful'] or \ | |
not basedata[metric]['meaningful']: | |
print(metric) | |
continue | |
newstddev = newdata[metric]['noise'] | |
basestddev = basedata[metric]['noise'] | |
# the closer to zero the better; if moonshots detect a 4% regression, we should be detecting something similar | |
# | |
delta = newdata[metric]['regression'] - basedata[metric]['regression'] | |
print("%s,%s,%s" % (metric, delta, (newstddev>basestddev))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment