Skip to content

Instantly share code, notes, and snippets.

@zacharysyoung
Last active February 8, 2023 18:51
Show Gist options
  • Save zacharysyoung/981266cd80ce65723aca368350375ffc to your computer and use it in GitHub Desktop.
Save zacharysyoung/981266cd80ce65723aca368350375ffc to your computer and use it in GitHub Desktop.
Iteratively run and time a program, extracting results from `/usr/bin/time -l ...`
#!/usr/bin/env python3
import csv
import re
import subprocess
from typing import TypedDict
import glob, os, sys
GEN_CSV_TMPL = "gen_{}x1.csv"
STATS_CSV = "stats.csv"
def clean():
def _rem(path: str):
try:
os.remove(path)
except FileNotFoundError:
pass
for fname in glob.glob(GEN_CSV_TMPL.format("*")):
_rem(fname)
_rem(STATS_CSV)
class TimeResults(TypedDict):
# timings (seconds)
real: float
user: float
sys: float
# mem footprint (bytes)
mem: int
def parse_time_results(stat_str: str) -> TimeResults:
"""Extract stats from multiline output from `/usr/bin/time -l`"""
lines = stat_str.splitlines()
# Parse first line, like, " 0.00 real 0.00 user 0.00 sys"
m_timings = re.match(r"\s+(\d+\.\d+) real\s+(\d+\.\d+) user\s+(\d+\.\d+) sys", lines[0])
assert m_timings != None
treal = float(m_timings.group(1))
tuser = float(m_timings.group(2))
tsys = float(m_timings.group(3))
# Parse last line, like, " 951040 peak memory footprint"
m_memory = re.match(r"\s+(\d+)\s+peak memory footprint", lines[-1])
assert m_memory != None
mem = int(m_memory.group(1))
return TimeResults(real=treal, user=tuser, sys=tsys, mem=mem)
def header() -> list[str]:
return ["Input size (MB)", "Input rows", "Real (s)", "User (s)", "Sys (s)", "Mem (MB)"]
def to_row(fsize: float, n_rows: int, stats: TimeResults) -> list[str]:
def _to_MB(x: str | int | float) -> float:
return float(x) / (1024 * 1024)
return [
f"{_to_MB(fsize):.2f}",
str(n_rows),
f"{stats['real']:.2f}",
f"{stats['user']:.2f}",
f"{stats['sys']:.2f}",
f"{_to_MB(stats['mem']):.2f}",
]
def main():
# Handle cmd-line args, if any
if len(sys.argv) > 1:
if sys.argv[1] == "clean":
clean()
exit(0)
print(f"error: run_time.py [clean]")
exit(1)
# Clean-up previous input CSVs
clean()
# Create stats CSV
f_stats = open(STATS_CSV, "w", newline="")
stats_writer = csv.writer(f_stats)
stats_writer.writerow(header())
input_rows = [1350, 13500, 135000, 1350000]
for n_rows in input_rows:
csv_name = GEN_CSV_TMPL.format(n_rows)
# Generate input CSV
with open(csv_name, "w", newline="") as f:
gen_writer = csv.writer(f)
gen_writer.writerow(["Col1"])
for i in range(n_rows):
gen_writer.writerow([f"{i:032x}"])
# Start stat-ing input CSV
fsize = os.stat(csv_name).st_size
cmd = f"/usr/bin/time -l ./main.py {csv_name} 2>&1"
print(f"Running `{cmd}`")
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
if p.stderr is not None:
raise ValueError(p.stderr)
assert p.stdout != None
stats = parse_time_results(p.stdout.read().decode("utf-8"))
stats_writer.writerow(to_row(fsize, n_rows, stats))
f_stats.flush()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment