Last active
January 16, 2025 18:08
-
-
Save glenn-jocher/79025909019b15dbfde945b70bb123c6 to your computer and use it in GitHub Desktop.
Profile Python import speeds
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
from typing import Dict, List, Tuple | |
from statistics import mean, stdev | |
from tqdm import tqdm | |
DEPENDENCIES = [ | |
"numpy", | |
"matplotlib", | |
"opencv-python", | |
"pillow", | |
"pyyaml", | |
"requests", | |
"scipy", | |
"torch", | |
"torchvision", | |
"tqdm", | |
"psutil", | |
"py-cpuinfo", | |
"pandas", | |
"seaborn", | |
"ultralytics-thop", | |
"ultralytics-autoimport", | |
"ultralytics-actions", | |
"ultralytics", | |
"pycocotools", | |
"albumentations", | |
"shutil", | |
"re", | |
"subprocess", | |
"multiprocessing", | |
"threading", | |
] | |
# Mapping for packages with different import names | |
IMPORT_NAMES = { | |
"opencv-python": "cv2", | |
"pillow": "PIL", | |
"pyyaml": "yaml", | |
"py-cpuinfo": "cpuinfo", | |
"ultralytics-thop": "thop", | |
"ultralytics-autoimport": "autoimport", | |
"ultralytics-actions": "actions", | |
} | |
def parse_package_name(dep: str) -> str: | |
"""Extract base package name from dependency string.""" | |
return dep.strip() | |
def measure_import_time(package: str, import_name: str) -> float: | |
"""Measure import time for a single package in an isolated environment.""" | |
cmd = f""" | |
import time | |
t = time.perf_counter() | |
import {import_name} | |
print(time.perf_counter() - t) | |
""" | |
try: | |
result = subprocess.run(["python", "-c", cmd], capture_output=True, text=True, timeout=10) | |
if result.returncode == 0: | |
return float(result.stdout) | |
print(f"\nFailed to import {package}: {result.stderr.strip()}") | |
return -1 | |
except (subprocess.TimeoutExpired, ValueError) as e: | |
print(f"\nError importing {package}: {str(e)}") | |
return -1 | |
def benchmark_import(package: str, pbar: tqdm, iterations: int = 5) -> Tuple[float, float]: | |
"""Run multiple iterations of import timing in isolated environments.""" | |
times = [] | |
import_name = IMPORT_NAMES.get(package, package) | |
for i in range(iterations): | |
pbar.set_description(f"Benchmarking {package:<20} {i + 1}/{iterations}") | |
time = measure_import_time(package, import_name) | |
if time == -1: | |
return -1, 0 | |
times.append(time) | |
return mean(times), stdev(times) | |
def benchmark_all_packages(packages: List[str]) -> Dict[str, Tuple[float, float]]: | |
"""Run benchmarks for all packages and return results.""" | |
results = {} | |
failed = [] | |
base_packages = sorted({parse_package_name(dep) for dep in packages}) | |
pbar = tqdm(base_packages) | |
for pkg in pbar: | |
avg_time, std_dev = benchmark_import(pkg, pbar) | |
if avg_time != -1: | |
results[pkg] = (avg_time, std_dev) | |
else: | |
failed.append(pkg) | |
return dict(sorted(results.items(), key=lambda x: x[1][0], reverse=True)), failed | |
def main(): | |
"""Benchmark import speeds for the specified dependencies.""" | |
results, failed = benchmark_all_packages(DEPENDENCIES) | |
# Convert results to pandas DataFrame | |
import pandas as pd | |
df = pd.DataFrame( | |
[{"Package": pkg, "Time (s)": time, "1-sigma": std_dev} for pkg, (time, std_dev) in results.items()] | |
) | |
if not df.empty: | |
# Format floats to 6 decimal places | |
pd.options.display.float_format = "{:.3f}".format | |
print("\nImport Speed Benchmarks:") | |
print(df.to_string(index=False)) | |
if failed: | |
print("\nFailed imports:") | |
for pkg in failed: | |
print(f"- {pkg}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment