Skip to content

Instantly share code, notes, and snippets.

@glenn-jocher
Last active January 16, 2025 18:08
Show Gist options
  • Save glenn-jocher/79025909019b15dbfde945b70bb123c6 to your computer and use it in GitHub Desktop.
Save glenn-jocher/79025909019b15dbfde945b70bb123c6 to your computer and use it in GitHub Desktop.
Profile Python import speeds
import subprocess
from typing import Dict, List, Tuple
from statistics import mean, stdev
from tqdm import tqdm
DEPENDENCIES = [
"numpy",
"matplotlib",
"opencv-python",
"pillow",
"pyyaml",
"requests",
"scipy",
"torch",
"torchvision",
"tqdm",
"psutil",
"py-cpuinfo",
"pandas",
"seaborn",
"ultralytics-thop",
"ultralytics-autoimport",
"ultralytics-actions",
"ultralytics",
"pycocotools",
"albumentations",
"shutil",
"re",
"subprocess",
"multiprocessing",
"threading",
]
# Mapping for packages with different import names
IMPORT_NAMES = {
"opencv-python": "cv2",
"pillow": "PIL",
"pyyaml": "yaml",
"py-cpuinfo": "cpuinfo",
"ultralytics-thop": "thop",
"ultralytics-autoimport": "autoimport",
"ultralytics-actions": "actions",
}
def parse_package_name(dep: str) -> str:
"""Extract base package name from dependency string."""
return dep.strip()
def measure_import_time(package: str, import_name: str) -> float:
"""Measure import time for a single package in an isolated environment."""
cmd = f"""
import time
t = time.perf_counter()
import {import_name}
print(time.perf_counter() - t)
"""
try:
result = subprocess.run(["python", "-c", cmd], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
return float(result.stdout)
print(f"\nFailed to import {package}: {result.stderr.strip()}")
return -1
except (subprocess.TimeoutExpired, ValueError) as e:
print(f"\nError importing {package}: {str(e)}")
return -1
def benchmark_import(package: str, pbar: tqdm, iterations: int = 5) -> Tuple[float, float]:
"""Run multiple iterations of import timing in isolated environments."""
times = []
import_name = IMPORT_NAMES.get(package, package)
for i in range(iterations):
pbar.set_description(f"Benchmarking {package:<20} {i + 1}/{iterations}")
time = measure_import_time(package, import_name)
if time == -1:
return -1, 0
times.append(time)
return mean(times), stdev(times)
def benchmark_all_packages(packages: List[str]) -> Dict[str, Tuple[float, float]]:
"""Run benchmarks for all packages and return results."""
results = {}
failed = []
base_packages = sorted({parse_package_name(dep) for dep in packages})
pbar = tqdm(base_packages)
for pkg in pbar:
avg_time, std_dev = benchmark_import(pkg, pbar)
if avg_time != -1:
results[pkg] = (avg_time, std_dev)
else:
failed.append(pkg)
return dict(sorted(results.items(), key=lambda x: x[1][0], reverse=True)), failed
def main():
"""Benchmark import speeds for the specified dependencies."""
results, failed = benchmark_all_packages(DEPENDENCIES)
# Convert results to pandas DataFrame
import pandas as pd
df = pd.DataFrame(
[{"Package": pkg, "Time (s)": time, "1-sigma": std_dev} for pkg, (time, std_dev) in results.items()]
)
if not df.empty:
# Format floats to 6 decimal places
pd.options.display.float_format = "{:.3f}".format
print("\nImport Speed Benchmarks:")
print(df.to_string(index=False))
if failed:
print("\nFailed imports:")
for pkg in failed:
print(f"- {pkg}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment