Skip to content

Instantly share code, notes, and snippets.

@gabrielfeo
Last active November 9, 2024 01:07
Show Gist options
  • Save gabrielfeo/73160a396481883a9b82a0a0d1237386 to your computer and use it in GitHub Desktop.
Save gabrielfeo/73160a396481883a9b82a0a0d1237386 to your computer and use it in GitHub Desktop.
Compare separate gradle-profiler (last tested with v0.20.0) benchmark results by merging them into a single HTML
#!/usr/bin/env python3
from collections import namedtuple
import os
import shutil
import re
import json
import argparse
import sys
parser = argparse.ArgumentParser(
"""
This utility can merge multiple gradle-profiler result HTMLs into a single
one.
Optionally, it can suffix the original scenario names, using a `ps -o` like
syntax:
- results-i5-machine (results directory)
- configuration (scenario)
- build (scenario)
- results-i7-machine
- configuration (scenario)
- build (scenario)
merge-results ./results-i5-machine=-i5 ./results-i7-machine=-i7
|
V
- merged (results directory)
- configuration-i5 (scenario)
- configuration-i7 (scenario)
- build-i5 (scenario)
- build-i7 (scenario)
"""
)
parser.add_argument("--output-dir", dest="output_dir", default="./merge")
parser.add_argument(
"benchmark_dirs",
nargs='+',
help="""
Benchmark results to concatenate. A scenario suffix may be specified in a `ps -o` like syntax: \
./results-1=-1 ./results-2=-2.
"""
)
args = parser.parse_args()
DirSpec = namedtuple('DirSpec', ['path', 'scenario_suffix'], defaults=[""])
dir_specs = tuple(map(lambda str: DirSpec(*str.split('=')), args.benchmark_dirs))
output_dir = args.output_dir
def html_path_of_dir(dir):
return f"{dir}/benchmark.html"
html_output_file = html_path_of_dir(output_dir)
def read_between(start_pattern, end_pattern, path) -> str:
result = ""
with open(path, "r", encoding='utf-8') as file:
inside = False
for line in file:
if re.match(start_pattern, line) is not None:
inside = True
elif inside and re.match(end_pattern, line) is not None:
break
elif inside:
result += line
return result
def get_benchmark_result(benchmark_dir) -> list:
results_str: str = read_between(
r"const benchmarkResult =", r"\s?\;\s?", html_path_of_dir(benchmark_dir))
try:
return json.loads(results_str)
except json.JSONDecodeError:
print(f"ERROR: Invalid JSON in {benchmark_dir}/benchmark.html", file=sys.stderr)
sys.exit(1)
def get_scenario_results(benchmark_dir) -> list:
results_json = get_benchmark_result(benchmark_dir)
return results_json["scenarios"]
def create_output_html():
first_dir = dir_specs[0].path
base_html_path = html_path_of_dir(first_dir)
os.makedirs(output_dir, exist_ok=True)
shutil.copyfile(base_html_path, html_output_file)
def concat_scenario_results() -> dict:
scenario_results = []
for dir in dir_specs:
suffixed_scenario_results = []
for result in get_scenario_results(dir.path):
name = result["definition"]["name"]
new_name = name + dir.scenario_suffix
result["definition"]["name"] = new_name
result["definition"]["title"] = new_name
suffixed_scenario_results.append(result)
scenario_results += suffixed_scenario_results
return scenario_results
def strip_date_and_environment(benchmark_result):
benchmark_result["date"] = ""
benchmark_result["environment"] = ""
def overwrite_scenario_results_in_output_html(new_scenario_results):
html = ""
with open(html_output_file, 'r', encoding='utf-8') as file:
html = file.read()
new_result = get_benchmark_result(output_dir)
strip_date_and_environment(new_result)
dir_paths = map(lambda dir: dir.path, dir_specs)
new_result["title"] = f"Benchmark Results: {' + '.join(dir_paths)}"
new_result["scenarios"] = new_scenario_results
new_result = f"const benchmarkResult = {json.dumps(new_result, indent=2)};"
html = re.sub(r"const benchmarkResult =.*}\s+]\s+}\s+;",
new_result, html, flags=re.S)
with open(html_output_file, 'w', encoding='utf-8') as file:
file.write(html)
if __name__ == '__main__':
if os.path.exists(output_dir):
print(f"ERROR: Output directory {output_dir} already exists", file=sys.stderr)
sys.exit(1)
create_output_html()
concatenated_scenario_results = concat_scenario_results()
overwrite_scenario_results_in_output_html(concatenated_scenario_results)
@gabrielfeo
Copy link
Author

gabrielfeo commented Feb 18, 2022

This is useful when benchmarks need to be ran separately or would be faster to run separately. Some use cases so far:

  • Comparing two commits with ABI changes (currently not possible: gradle/gradle-profiler#325)
  • Comparing different machines
  • Comparing different OSes
  • Splitting slow benchmark scenarios across different machines to run in parallel
  • When a benchmark of 1+ scenarios fails, you can retry just the failed ones, and just merge in the 1st benchmark's successful scenarios.

Known issues

  1. Does not merge the CSVs of benchmarks. Does not copy the scenarios's directories. Only the HTML is merged.
  2. Does not support comments inside the HTML <script>. If you must remove any failed scenario that causes gradle-profiler to display NaN, you must not comment the scenario object, but delete the scenario from the HTML instead (or remove it from the <script> tag).

@wzieba
Copy link

wzieba commented Jan 24, 2024

Thank you!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment