Last active
February 9, 2022 17:55
-
-
Save ChrisCummins/d4386fd043c1edbeb4da75789e277a9f to your computer and use it in GitHub Desktop.
Evaluating binary sizes of CHStone benchmarks when compiled using different combinations of clang / opt flags
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Script to evaluate binary sizes of CHStone benchmarks when compiled using | |
different combinations of clang / opt flags. | |
Requires latest CompilerGym: | |
python -m pip install compiler_gym -U | |
Usage: | |
python opt_wtf.py | |
Output on my machine: | |
List of compilation methods used: | |
01 = clang $< -o $@ -Oz | |
02 = clang $< -o a.bc -Oz -emit-llvm -c ; clang -Oz a.bc -o $@ | |
03 = clang $< -o a.bc -Oz -emit-llvm -c ; opt -Oz a.bc -o b.bc ; clang -Oz b.bc -o $@ | |
04 = clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -Oz -o $@ | |
05 = clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o b.bc ; clang b.bc -o $@ -Oz | |
06 = clang $< -o a.bc -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -o $@ -Oz | |
07 = clang $< -o a.bc -Oz -emit-llvm -c ; clang a.bc -o $@ | |
08 = clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o out.bc ; clang out.bc -o $@ | |
Size of benchmarks when compiled using each method (in bytes): | |
mips gsm adpcm motion dfadd dfmul dfdiv aes blowfish dfsin sha jpeg | |
Method 01: 2760 5048 6842 7523 5399 4463 4863 11361 23320 7229 18804 31329 | |
Method 02: 2760 4968 6842 7523 5399 4439 4863 11225 23320 7229 18836 31321 | |
Method 03: 2760 4968 6842 7523 5399 4439 4863 11225 23320 7229 18836 31321 | |
Method 04: 2760 5048 6842 7523 5399 4463 4863 11361 23320 7229 18804 31329 | |
Method 05: 2760 4968 6842 7523 5399 4439 4863 11225 23320 7229 18804 31321 | |
Method 06: 3952 7416 9226 8635 6919 5775 6567 16765 27048 9933 19708 35460 | |
Method 07: 5360 8000 9378 9043 7831 6135 7279 15793 26669 12261 20156 36825 | |
Method 08: 5360 8336 9378 9043 7831 6135 7279 15993 26765 12261 20172 36825 | |
Size deltas of each method, relative to Method 01 (in bytes): | |
mips gsm adpcm motion dfadd dfmul dfdiv aes blowfish dfsin sha jpeg | |
Method 01: 0 0 0 0 0 0 0 0 0 0 0 0 | |
Method 02: 0 -80 0 0 0 -24 0 -136 0 0 32 -8 | |
Method 03: 0 -80 0 0 0 -24 0 -136 0 0 32 -8 | |
Method 04: 0 0 0 0 0 0 0 0 0 0 0 0 | |
Method 05: 0 -80 0 0 0 -24 0 -136 0 0 0 -8 | |
Method 06: 1192 2368 2384 1112 1520 1312 1704 5404 3728 2704 904 4131 | |
Method 07: 2600 2952 2536 1520 2432 1672 2416 4432 3349 5032 1352 5496 | |
Method 08: 2600 3288 2536 1520 2432 1672 2416 4632 3445 5032 1368 5496 | |
""" | |
import shlex | |
import compiler_gym | |
from pathlib import Path | |
import subprocess | |
import pandas as pd | |
from compiler_gym.util.shell_format import emph | |
import tempfile | |
compiler = "clang-10" | |
opt = "opt-10" | |
def print_size_of_gcc_chstone_benchmarks(): | |
sizes = [] | |
with compiler_gym.make("gcc-v0") as env: | |
for bm in env.datasets["benchmark://chstone-v0"]: | |
env.reset(benchmark=bm) | |
sizes.append((env.benchmark, env.observation.obj_size())) | |
sizes.sort(key=lambda x: x[-1]) | |
for bm, size in sizes: | |
print(bm, size) | |
def write_preprocessed_chstone_benchmark_sources_to_file(outdir: Path): | |
outdir.mkdir(exist_ok=True, parents=True) | |
with compiler_gym.make("gcc-v0") as env: | |
for bm in env.datasets["benchmark://chstone-v0"]: | |
env.reset(benchmark=bm) | |
outpath = f"{outdir}{env.benchmark.uri.path}.c" | |
print(outpath, flush=True) | |
src = env.benchmark.proto.program.contents.decode("utf-8") | |
with open(outpath, "w") as f: | |
print(src, file=f) | |
def enumerate_benchmark_names(outdir: Path): | |
for file in outdir.iterdir(): | |
if file.suffix == ".c": | |
yield file.stem | |
def size_of_text_section(binary: Path) -> int: | |
"""Return the size of the .TEXT section for a binary.""" | |
stdout = subprocess.check_output( | |
["llvm-size-10", str(binary)], universal_newlines=True | |
) | |
return int(stdout.split("\n")[-2].split()[0]) | |
def main(): | |
outdir = Path("chstone") | |
if not (outdir / "adpcm.c").is_file(): | |
write_preprocessed_chstone_benchmark_sources_to_file(outdir) | |
benchmarks = list(enumerate_benchmark_names(outdir)) | |
methods = [] | |
rows = [] | |
print(emph("List of compilation methods used:")) | |
def method(command_line: str): | |
def _run_one(cmd: str, benchmark: str): | |
assert (outdir / f"{benchmark}.c").is_file() | |
with tempfile.TemporaryDirectory() as d: | |
d = Path(d) | |
commands = cmd.split(";") | |
for command in commands: | |
args = shlex.split(command) | |
# Substitute variables and binary names. | |
args = [compiler if c == "clang" else c for c in args] | |
args = [opt if c == "opt" else c for c in args] | |
args = [str(outdir / benchmark) + ".c" if c == "$<" else c for c in args] | |
args = [str(d / "a.out") if c == "$@" else c for c in args] | |
subprocess.check_call(args, timeout=60) | |
return size_of_text_section(str(d / "a.out")) | |
methods.append(command_line) | |
print(f"{len(methods):02d} = {command_line}") | |
rows.append({ | |
bm: _run_one(command_line, bm) | |
for bm in benchmarks | |
}) | |
def finish(): | |
deltas = [] | |
for row in rows: | |
deltas.append({k: v - rows[0][k] for k, v in row.items()}) | |
index = [f"Method {i:02}:" for i, _ in enumerate(methods, start=1)] | |
sizes = pd.DataFrame(rows, index=index) | |
deltas = pd.DataFrame(deltas, index=index) | |
print() | |
print(emph("Size of benchmarks when compiled using each method (in bytes):")) | |
print(sizes) | |
print() | |
print(emph("Size deltas of each method, relative to Method 01 (in bytes):")) | |
print(deltas) | |
method("clang $< -o $@ -Oz") | |
method("clang $< -o a.bc -Oz -emit-llvm -c ; clang -Oz a.bc -o $@") | |
method("clang $< -o a.bc -Oz -emit-llvm -c ; opt -Oz a.bc -o b.bc ; clang -Oz b.bc -o $@") | |
method("clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -Oz -o $@") | |
method("clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o b.bc ; clang b.bc -o $@ -Oz") | |
method("clang $< -o a.bc -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; clang a.bc -o $@ -Oz") | |
method("clang $< -o a.bc -Oz -emit-llvm -c ; clang a.bc -o $@") | |
method("clang $< -o a.bc -Oz -emit-llvm -c -Xclang -disable-llvm-passes -Xclang -disable-llvm-optzns ; opt -Oz a.bc -o out.bc ; clang out.bc -o $@") | |
finish() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment