ericboehs · February 18, 2026 17:55 · ericboehs · Feb 18, 2026
diff --git a/claude-code-stats.py b/claude-code-stats.py
 #!/usr/bin/env python3
 """Claude Code usage stats analyzer.

 Parses local Claude Code session data to show:
 - Average interactions per day
 - Average input/output tokens per interaction
 - Average tool call chain depth
 - Token breakdown by category (base, cache write, cache read)
 - Estimated API costs by model

 Usage:
    python3 claude-code-stats.py
 """

 import json
 import sys
 from collections import Counter, defaultdict
 from datetime import datetime
 from pathlib import Path


 CLAUDE_DIR = Path.home() / ".claude"
 PROJECTS_DIR = CLAUDE_DIR / "projects"
 HISTORY_FILE = CLAUDE_DIR / "history.jsonl"

 # Pricing per million tokens (USD) - https://platform.claude.com/docs/en/about-claude/pricing
 MODEL_PRICING = {
    "claude-opus-4-6": {
        "label": "Opus 4.6",
        "input": 5.00,
        "output": 25.00,
        "cache_write_5m": 6.25,
        "cache_write_1h": 10.00,
        "cache_read": 0.50,
    },
    "claude-opus-4-5": {
        "label": "Opus 4.5",
        "input": 5.00,
        "output": 25.00,
        "cache_write_5m": 6.25,
        "cache_write_1h": 10.00,
        "cache_read": 0.50,
    },
    "claude-sonnet-4-6": {
        "label": "Sonnet 4.6",
        "input": 3.00,
        "output": 15.00,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6.00,
        "cache_read": 0.30,
    },
    "claude-sonnet-4-5": {
        "label": "Sonnet 4.5",
        "input": 3.00,
        "output": 15.00,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6.00,
        "cache_read": 0.30,
    },
    "claude-sonnet-4-0": {
        "label": "Sonnet 4",
        "input": 3.00,
        "output": 15.00,
        "cache_write_5m": 3.75,
        "cache_write_1h": 6.00,
        "cache_read": 0.30,
    },
    "claude-haiku-4-5": {
        "label": "Haiku 4.5",
        "input": 1.00,
        "output": 5.00,
        "cache_write_5m": 1.25,
        "cache_write_1h": 2.00,
        "cache_read": 0.10,
    },
 }

 # Fallback pricing tiers for unknown models
 PRICING_FALLBACKS = [
    ("opus", {
        "label": "Opus (other)",
        "input": 5.00, "output": 25.00,
        "cache_write_5m": 6.25, "cache_write_1h": 10.00, "cache_read": 0.50,
    }),
    ("sonnet", {
        "label": "Sonnet (other)",
        "input": 3.00, "output": 15.00,
        "cache_write_5m": 3.75, "cache_write_1h": 6.00, "cache_read": 0.30,
    }),
    ("haiku", {
        "label": "Haiku (other)",
        "input": 1.00, "output": 5.00,
        "cache_write_5m": 1.25, "cache_write_1h": 2.00, "cache_read": 0.10,
    }),
 ]


 def get_pricing(model_id):
    if not model_id:
        return None
    normalized = model_id.replace(".", "-").lower()
    # Strip date suffixes like -20250301
    for known_id, pricing in MODEL_PRICING.items():
        if normalized.startswith(known_id):
            return pricing
    for keyword, pricing in PRICING_FALLBACKS:
        if keyword in normalized:
            return pricing
    return None


 def find_session_files():
    if not PROJECTS_DIR.exists():
        return []
    return list(PROJECTS_DIR.rglob("*.jsonl"))


 def parse_history_dates():
    dates = defaultdict(int)
    if not HISTORY_FILE.exists():
        return dates
    with open(HISTORY_FILE) as f:
        for line in f:
            try:
                d = json.loads(line)
                ts = d.get("timestamp")
                if ts:
                    dt = datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%d")
                    dates[dt] += 1
            except (json.JSONDecodeError, ValueError):
                continue
    return dates


 def analyze_sessions(session_files):
    interactions = []
    token_totals = {
        "base_input": 0,
        "cache_write_5m": 0,
        "cache_write_1h": 0,
        "cache_read": 0,
        "output": 0,
    }
    model_totals = defaultdict(lambda: {
        "base_input": 0, "cache_write_5m": 0, "cache_write_1h": 0,
        "cache_read": 0, "output": 0, "turns": 0,
    })

    for path in session_files:
        try:
            with open(path) as f:
                lines = f.readlines()
        except OSError:
            continue

        messages = []
        for line in lines:
            try:
                d = json.loads(line)
                if d.get("type") in ("user", "assistant"):
                    messages.append(d)
            except json.JSONDecodeError:
                continue

        idx = 0
        while idx < len(messages):
            if messages[idx].get("type") == "user":
                inp_total = 0
                out_total = 0
                chain_depth = 0
                idx += 1
                while idx < len(messages) and messages[idx].get("type") == "assistant":
                    msg = messages[idx].get("message", {})
                    usage = msg.get("usage", {})
                    model = msg.get("model", "unknown")

                    base_input = usage.get("input_tokens", 0)
                    cache_read = usage.get("cache_read_input_tokens", 0)
                    output = usage.get("output_tokens", 0)

                    # Cache write breakdown
                    cache_creation = usage.get("cache_creation", {})
                    cache_write_5m = cache_creation.get("ephemeral_5m_input_tokens", 0)
                    cache_write_1h = cache_creation.get("ephemeral_1h_input_tokens", 0)

                    # Fallback: if no granular breakdown, use total cache_creation
                    total_cache_creation = usage.get("cache_creation_input_tokens", 0)
                    if cache_write_5m == 0 and cache_write_1h == 0 and total_cache_creation > 0:
                        cache_write_5m = total_cache_creation

                    token_totals["base_input"] += base_input
                    token_totals["cache_write_5m"] += cache_write_5m
                    token_totals["cache_write_1h"] += cache_write_1h
                    token_totals["cache_read"] += cache_read
                    token_totals["output"] += output

                    model_totals[model]["base_input"] += base_input
                    model_totals[model]["cache_write_5m"] += cache_write_5m
                    model_totals[model]["cache_write_1h"] += cache_write_1h
                    model_totals[model]["cache_read"] += cache_read
                    model_totals[model]["output"] += output
                    model_totals[model]["turns"] += 1

                    inp_total += base_input + cache_read + cache_write_5m + cache_write_1h
                    out_total += output
                    chain_depth += 1
                    idx += 1

                if inp_total > 0 or out_total > 0:
                    interactions.append({
                        "input_tokens": inp_total,
                        "output_tokens": out_total,
                        "chain_depth": chain_depth,
                    })
            else:
                idx += 1

    return interactions, token_totals, dict(model_totals)


 def calc_cost(totals, pricing):
    mtok = 1_000_000
    return (
        totals["base_input"] / mtok * pricing["input"]
        + totals["cache_write_5m"] / mtok * pricing["cache_write_5m"]
        + totals["cache_write_1h"] / mtok * pricing["cache_write_1h"]
        + totals["cache_read"] / mtok * pricing["cache_read"]
        + totals["output"] / mtok * pricing["output"]
    )


 def median(lst):
    if not lst:
        return 0
    s = sorted(lst)
    n = len(s)
    if n % 2 == 1:
        return s[n // 2]
    return (s[n // 2 - 1] + s[n // 2]) / 2


 def percentile(lst, p):
    if not lst:
        return 0
    s = sorted(lst)
    idx = int(len(s) * p / 100)
    return s[min(idx, len(s) - 1)]


 def fmt(n):
    """Format token counts for display."""
    if n >= 1_000_000_000:
        return f"{n / 1_000_000_000:.1f}B"
    if n >= 1_000_000:
        return f"{n / 1_000_000:.1f}M"
    if n >= 1_000:
        return f"{n / 1_000:.1f}K"
    return f"{n:.0f}"


 def main():
    if not CLAUDE_DIR.exists():
        print("Error: ~/.claude directory not found.", file=sys.stderr)
        print("Are you sure Claude Code is installed?", file=sys.stderr)
        sys.exit(1)

    session_files = find_session_files()
    if not session_files:
        print("No session files found in ~/.claude/projects/", file=sys.stderr)
        sys.exit(1)

    history_dates = parse_history_dates()

    print(f"Analyzing {len(session_files)} sessions...", end="", flush=True)
    interactions, token_totals, model_totals = analyze_sessions(session_files)
    print(" done.\n")

    input_tokens = [i["input_tokens"] for i in interactions]
    output_tokens = [i["output_tokens"] for i in interactions]
    chain_depths = [i["chain_depth"] for i in interactions]

    n = len(interactions)
    num_active_days = len(history_dates) if history_dates else 1
    total_history_msgs = sum(history_dates.values()) if history_dates else n
    date_min = min(history_dates.keys()) if history_dates else "?"
    date_max = max(history_dates.keys()) if history_dates else "?"

    avg_per_day = total_history_msgs / num_active_days
    avg_inp = sum(input_tokens) / n if n else 0
    avg_out = sum(output_tokens) / n if n else 0
    avg_depth = sum(chain_depths) / n if n else 0

    # Token category totals
    t = token_totals
    total_all_input = t["base_input"] + t["cache_write_5m"] + t["cache_write_1h"] + t["cache_read"]
    cache_hit_rate = t["cache_read"] / total_all_input * 100 if total_all_input else 0

    w = 60
    print("=" * w)
    print("          CLAUDE CODE USAGE STATS")
    print("=" * w)
    print()
    print(f"  Date range:    {date_min} to {date_max}")
    print(f"  Active days:   {num_active_days}")
    print(f"  Sessions:      {len(session_files):,}")
    print(f"  Interactions:  {n:,}")
    print()

    # --- Key Metrics ---
    print("-" * w)
    print("  KEY METRICS")
    print("-" * w)
    print()
    print(f"  Avg interactions/day:            {avg_per_day:.1f}")
    print()
    print(f"  Avg input tokens/interaction:    {fmt(avg_inp)}")
    print(f"  Median:                          {fmt(median(input_tokens))}")
    print(f"  P90:                             {fmt(percentile(input_tokens, 90))}")
    print()
    print(f"  Avg output tokens/interaction:   {fmt(avg_out)}")
    print(f"  Median:                          {fmt(median(output_tokens))}")
    print(f"  P90:                             {fmt(percentile(output_tokens, 90))}")
    print()
    print(f"  Avg tool call chain depth:       {avg_depth:.1f}")
    print(f"  Median:                          {median(chain_depths):.0f}")
    print(f"  P90:                             {percentile(chain_depths, 90)}")
    print()

    # --- Token Breakdown ---
    print("-" * w)
    print("  TOKEN BREAKDOWN")
    print("-" * w)
    print()
    print(f"  Base input tokens:      {t['base_input']:>15,}  ({fmt(t['base_input'])})")
    print(f"  Cache write (5m):       {t['cache_write_5m']:>15,}  ({fmt(t['cache_write_5m'])})")
    print(f"  Cache write (1h):       {t['cache_write_1h']:>15,}  ({fmt(t['cache_write_1h'])})")
    print(f"  Cache read (hits):      {t['cache_read']:>15,}  ({fmt(t['cache_read'])})")
    print(f"  Output tokens:          {t['output']:>15,}  ({fmt(t['output'])})")
    print(f"                          {'─' * 15}")
    print(f"  Total input tokens:     {total_all_input:>15,}  ({fmt(total_all_input)})")
    print()
    print(f"  Cache hit rate:         {cache_hit_rate:.1f}%")
    print(f"    (cache reads / total input tokens)")
    print()

    # --- Chain Depth ---
    print("-" * w)
    print("  CHAIN DEPTH DISTRIBUTION")
    print("-" * w)
    if chain_depths:
        depth_counts = Counter(chain_depths)
        max_count = max(depth_counts.values())
        for depth in sorted(depth_counts.keys())[:10]:
            count = depth_counts[depth]
            bar_len = int(count / max_count * 30)
            bar = "#" * bar_len
            print(f"    {depth:2d}: {count:6,}  {bar}")
        deep = sum(v for k, v in depth_counts.items() if k > 10)
        if deep:
            print(f"   >10: {deep:6,}")
        print(f"   max: {max(chain_depths)}")
    print()

    # --- Cost Estimate ---
    print("-" * w)
    print("  ESTIMATED API COST (by model)")
    print("-" * w)
    print()

    grand_total_cost = 0.0
    sorted_models = sorted(model_totals.items(), key=lambda x: x[1]["turns"], reverse=True)

    for model_id, totals in sorted_models:
        pricing = get_pricing(model_id)
        if not pricing:
            label = model_id
            cost_str = "(unknown pricing)"
        else:
            label = pricing["label"]
            cost = calc_cost(totals, pricing)
            grand_total_cost += cost
            cost_str = f"${cost:,.2f}"

        model_input = totals["base_input"] + totals["cache_write_5m"] + totals["cache_write_1h"] + totals["cache_read"]
        model_cache_hits = totals["cache_read"]
        model_hit_rate = model_cache_hits / model_input * 100 if model_input else 0

        print(f"  {label} ({model_id})")
        print(f"    Turns: {totals['turns']:,}  |  Input: {fmt(model_input)}  |  Output: {fmt(totals['output'])}")
        print(f"    Cache hit rate: {model_hit_rate:.1f}%  |  Cost: {cost_str}")
        print()

    print(f"  {'─' * (w - 4)}")
    print(f"  TOTAL ESTIMATED COST:  ${grand_total_cost:,.2f}")
    print(f"  Cost per active day:   ${grand_total_cost / num_active_days:,.2f}")
    print(f"  Cost per interaction:  ${grand_total_cost / n if n else 0:,.4f}")
    print()

    # --- Hypothetical: What if no caching? ---
    print("-" * w)
    print("  WHAT IF: No prompt caching?")
    print("-" * w)
    print()
    nocache_cost = 0.0
    for model_id, totals in sorted_models:
        pricing = get_pricing(model_id)
        if not pricing:
            continue
        # Without caching: all cache_write + cache_read would be base input
        mtok = 1_000_000
        all_input = totals["base_input"] + totals["cache_write_5m"] + totals["cache_write_1h"] + totals["cache_read"]
        nocache_cost += all_input / mtok * pricing["input"] + totals["output"] / mtok * pricing["output"]

    savings = nocache_cost - grand_total_cost
    savings_pct = savings / nocache_cost * 100 if nocache_cost else 0
    print(f"  Cost without caching:  ${nocache_cost:,.2f}")
    print(f"  Actual cost:           ${grand_total_cost:,.2f}")
    print(f"  Savings from caching:  ${savings:,.2f} ({savings_pct:.1f}%)")
    print()

    # --- Top 5 Busiest Days ---
    if history_dates:
        print("-" * w)
        print("  TOP 5 BUSIEST DAYS")
        print("-" * w)
        sorted_days = sorted(history_dates.items(), key=lambda x: x[1], reverse=True)[:5]
        for day, count in sorted_days:
            print(f"    {day}:  {count} interactions")
        print()


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""Claude Code usage stats analyzer.

	Parses local Claude Code session data to show:
	- Average interactions per day
	- Average input/output tokens per interaction
	- Average tool call chain depth
	- Token breakdown by category (base, cache write, cache read)
	- Estimated API costs by model

	Usage:
	python3 claude-code-stats.py
	"""

	import json
	import sys
	from collections import Counter, defaultdict
	from datetime import datetime
	from pathlib import Path


	CLAUDE_DIR = Path.home() / ".claude"
	PROJECTS_DIR = CLAUDE_DIR / "projects"
	HISTORY_FILE = CLAUDE_DIR / "history.jsonl"

	# Pricing per million tokens (USD) - https://platform.claude.com/docs/en/about-claude/pricing
	MODEL_PRICING = {
	"claude-opus-4-6": {
	"label": "Opus 4.6",
	"input": 5.00,
	"output": 25.00,
	"cache_write_5m": 6.25,
	"cache_write_1h": 10.00,
	"cache_read": 0.50,
	},
	"claude-opus-4-5": {
	"label": "Opus 4.5",
	"input": 5.00,
	"output": 25.00,
	"cache_write_5m": 6.25,
	"cache_write_1h": 10.00,
	"cache_read": 0.50,
	},
	"claude-sonnet-4-6": {
	"label": "Sonnet 4.6",
	"input": 3.00,
	"output": 15.00,
	"cache_write_5m": 3.75,
	"cache_write_1h": 6.00,
	"cache_read": 0.30,
	},
	"claude-sonnet-4-5": {
	"label": "Sonnet 4.5",
	"input": 3.00,
	"output": 15.00,
	"cache_write_5m": 3.75,
	"cache_write_1h": 6.00,
	"cache_read": 0.30,
	},
	"claude-sonnet-4-0": {
	"label": "Sonnet 4",
	"input": 3.00,
	"output": 15.00,
	"cache_write_5m": 3.75,
	"cache_write_1h": 6.00,
	"cache_read": 0.30,
	},
	"claude-haiku-4-5": {
	"label": "Haiku 4.5",
	"input": 1.00,
	"output": 5.00,
	"cache_write_5m": 1.25,
	"cache_write_1h": 2.00,
	"cache_read": 0.10,
	},
	}

	# Fallback pricing tiers for unknown models
	PRICING_FALLBACKS = [
	("opus", {
	"label": "Opus (other)",
	"input": 5.00, "output": 25.00,
	"cache_write_5m": 6.25, "cache_write_1h": 10.00, "cache_read": 0.50,
	}),
	("sonnet", {
	"label": "Sonnet (other)",
	"input": 3.00, "output": 15.00,
	"cache_write_5m": 3.75, "cache_write_1h": 6.00, "cache_read": 0.30,
	}),
	("haiku", {
	"label": "Haiku (other)",
	"input": 1.00, "output": 5.00,
	"cache_write_5m": 1.25, "cache_write_1h": 2.00, "cache_read": 0.10,
	}),
	]


	def get_pricing(model_id):
	if not model_id:
	return None
	normalized = model_id.replace(".", "-").lower()
	# Strip date suffixes like -20250301
	for known_id, pricing in MODEL_PRICING.items():
	if normalized.startswith(known_id):
	return pricing
	for keyword, pricing in PRICING_FALLBACKS:
	if keyword in normalized:
	return pricing
	return None


	def find_session_files():
	if not PROJECTS_DIR.exists():
	return []
	return list(PROJECTS_DIR.rglob("*.jsonl"))


	def parse_history_dates():
	dates = defaultdict(int)
	if not HISTORY_FILE.exists():
	return dates
	with open(HISTORY_FILE) as f:
	for line in f:
	try:
	d = json.loads(line)
	ts = d.get("timestamp")
	if ts:
	dt = datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%d")
	dates[dt] += 1
	except (json.JSONDecodeError, ValueError):
	continue
	return dates


	def analyze_sessions(session_files):
	interactions = []
	token_totals = {
	"base_input": 0,
	"cache_write_5m": 0,
	"cache_write_1h": 0,
	"cache_read": 0,
	"output": 0,
	}
	model_totals = defaultdict(lambda: {
	"base_input": 0, "cache_write_5m": 0, "cache_write_1h": 0,
	"cache_read": 0, "output": 0, "turns": 0,
	})

	for path in session_files:
	try:
	with open(path) as f:
	lines = f.readlines()
	except OSError:
	continue

	messages = []
	for line in lines:
	try:
	d = json.loads(line)
	if d.get("type") in ("user", "assistant"):
	messages.append(d)
	except json.JSONDecodeError:
	continue

	idx = 0
	while idx < len(messages):
	if messages[idx].get("type") == "user":
	inp_total = 0
	out_total = 0
	chain_depth = 0
	idx += 1
	while idx < len(messages) and messages[idx].get("type") == "assistant":
	msg = messages[idx].get("message", {})
	usage = msg.get("usage", {})
	model = msg.get("model", "unknown")

	base_input = usage.get("input_tokens", 0)
	cache_read = usage.get("cache_read_input_tokens", 0)
	output = usage.get("output_tokens", 0)

	# Cache write breakdown
	cache_creation = usage.get("cache_creation", {})
	cache_write_5m = cache_creation.get("ephemeral_5m_input_tokens", 0)
	cache_write_1h = cache_creation.get("ephemeral_1h_input_tokens", 0)

	# Fallback: if no granular breakdown, use total cache_creation
	total_cache_creation = usage.get("cache_creation_input_tokens", 0)
	if cache_write_5m == 0 and cache_write_1h == 0 and total_cache_creation > 0:
	cache_write_5m = total_cache_creation

	token_totals["base_input"] += base_input
	token_totals["cache_write_5m"] += cache_write_5m
	token_totals["cache_write_1h"] += cache_write_1h
	token_totals["cache_read"] += cache_read
	token_totals["output"] += output

	model_totals[model]["base_input"] += base_input
	model_totals[model]["cache_write_5m"] += cache_write_5m
	model_totals[model]["cache_write_1h"] += cache_write_1h
	model_totals[model]["cache_read"] += cache_read
	model_totals[model]["output"] += output
	model_totals[model]["turns"] += 1

	inp_total += base_input + cache_read + cache_write_5m + cache_write_1h
	out_total += output
	chain_depth += 1
	idx += 1

	if inp_total > 0 or out_total > 0:
	interactions.append({
	"input_tokens": inp_total,
	"output_tokens": out_total,
	"chain_depth": chain_depth,
	})
	else:
	idx += 1

	return interactions, token_totals, dict(model_totals)


	def calc_cost(totals, pricing):
	mtok = 1_000_000
	return (
	totals["base_input"] / mtok * pricing["input"]
	+ totals["cache_write_5m"] / mtok * pricing["cache_write_5m"]
	+ totals["cache_write_1h"] / mtok * pricing["cache_write_1h"]
	+ totals["cache_read"] / mtok * pricing["cache_read"]
	+ totals["output"] / mtok * pricing["output"]
	)


	def median(lst):
	if not lst:
	return 0
	s = sorted(lst)
	n = len(s)
	if n % 2 == 1:
	return s[n // 2]
	return (s[n // 2 - 1] + s[n // 2]) / 2


	def percentile(lst, p):
	if not lst:
	return 0
	s = sorted(lst)
	idx = int(len(s) * p / 100)
	return s[min(idx, len(s) - 1)]


	def fmt(n):
	"""Format token counts for display."""
	if n >= 1_000_000_000:
	return f"{n / 1_000_000_000:.1f}B"
	if n >= 1_000_000:
	return f"{n / 1_000_000:.1f}M"
	if n >= 1_000:
	return f"{n / 1_000:.1f}K"
	return f"{n:.0f}"


	def main():
	if not CLAUDE_DIR.exists():
	print("Error: ~/.claude directory not found.", file=sys.stderr)
	print("Are you sure Claude Code is installed?", file=sys.stderr)
	sys.exit(1)

	session_files = find_session_files()
	if not session_files:
	print("No session files found in ~/.claude/projects/", file=sys.stderr)
	sys.exit(1)

	history_dates = parse_history_dates()

	print(f"Analyzing {len(session_files)} sessions...", end="", flush=True)
	interactions, token_totals, model_totals = analyze_sessions(session_files)
	print(" done.\n")

	input_tokens = [i["input_tokens"] for i in interactions]
	output_tokens = [i["output_tokens"] for i in interactions]
	chain_depths = [i["chain_depth"] for i in interactions]

	n = len(interactions)
	num_active_days = len(history_dates) if history_dates else 1
	total_history_msgs = sum(history_dates.values()) if history_dates else n
	date_min = min(history_dates.keys()) if history_dates else "?"
	date_max = max(history_dates.keys()) if history_dates else "?"

	avg_per_day = total_history_msgs / num_active_days
	avg_inp = sum(input_tokens) / n if n else 0
	avg_out = sum(output_tokens) / n if n else 0
	avg_depth = sum(chain_depths) / n if n else 0

	# Token category totals
	t = token_totals
	total_all_input = t["base_input"] + t["cache_write_5m"] + t["cache_write_1h"] + t["cache_read"]
	cache_hit_rate = t["cache_read"] / total_all_input * 100 if total_all_input else 0

	w = 60
	print("=" * w)
	print(" CLAUDE CODE USAGE STATS")
	print("=" * w)
	print()
	print(f" Date range: {date_min} to {date_max}")
	print(f" Active days: {num_active_days}")
	print(f" Sessions: {len(session_files):,}")
	print(f" Interactions: {n:,}")
	print()

	# --- Key Metrics ---
	print("-" * w)
	print(" KEY METRICS")
	print("-" * w)
	print()
	print(f" Avg interactions/day: {avg_per_day:.1f}")
	print()
	print(f" Avg input tokens/interaction: {fmt(avg_inp)}")
	print(f" Median: {fmt(median(input_tokens))}")
	print(f" P90: {fmt(percentile(input_tokens, 90))}")
	print()
	print(f" Avg output tokens/interaction: {fmt(avg_out)}")
	print(f" Median: {fmt(median(output_tokens))}")
	print(f" P90: {fmt(percentile(output_tokens, 90))}")
	print()
	print(f" Avg tool call chain depth: {avg_depth:.1f}")
	print(f" Median: {median(chain_depths):.0f}")
	print(f" P90: {percentile(chain_depths, 90)}")
	print()

	# --- Token Breakdown ---
	print("-" * w)
	print(" TOKEN BREAKDOWN")
	print("-" * w)
	print()
	print(f" Base input tokens: {t['base_input']:>15,} ({fmt(t['base_input'])})")
	print(f" Cache write (5m): {t['cache_write_5m']:>15,} ({fmt(t['cache_write_5m'])})")
	print(f" Cache write (1h): {t['cache_write_1h']:>15,} ({fmt(t['cache_write_1h'])})")
	print(f" Cache read (hits): {t['cache_read']:>15,} ({fmt(t['cache_read'])})")
	print(f" Output tokens: {t['output']:>15,} ({fmt(t['output'])})")
	print(f" {'─' * 15}")
	print(f" Total input tokens: {total_all_input:>15,} ({fmt(total_all_input)})")
	print()
	print(f" Cache hit rate: {cache_hit_rate:.1f}%")
	print(f" (cache reads / total input tokens)")
	print()

	# --- Chain Depth ---
	print("-" * w)
	print(" CHAIN DEPTH DISTRIBUTION")
	print("-" * w)
	if chain_depths:
	depth_counts = Counter(chain_depths)
	max_count = max(depth_counts.values())
	for depth in sorted(depth_counts.keys())[:10]:
	count = depth_counts[depth]
	bar_len = int(count / max_count * 30)
	bar = "#" * bar_len
	print(f" {depth:2d}: {count:6,} {bar}")
	deep = sum(v for k, v in depth_counts.items() if k > 10)
	if deep:
	print(f" >10: {deep:6,}")
	print(f" max: {max(chain_depths)}")
	print()

	# --- Cost Estimate ---
	print("-" * w)
	print(" ESTIMATED API COST (by model)")
	print("-" * w)
	print()

	grand_total_cost = 0.0
	sorted_models = sorted(model_totals.items(), key=lambda x: x[1]["turns"], reverse=True)

	for model_id, totals in sorted_models:
	pricing = get_pricing(model_id)
	if not pricing:
	label = model_id
	cost_str = "(unknown pricing)"
	else:
	label = pricing["label"]
	cost = calc_cost(totals, pricing)
	grand_total_cost += cost
	cost_str = f"${cost:,.2f}"

	model_input = totals["base_input"] + totals["cache_write_5m"] + totals["cache_write_1h"] + totals["cache_read"]
	model_cache_hits = totals["cache_read"]
	model_hit_rate = model_cache_hits / model_input * 100 if model_input else 0

	print(f" {label} ({model_id})")
	print(f" Turns: {totals['turns']:,} \| Input: {fmt(model_input)} \| Output: {fmt(totals['output'])}")
	print(f" Cache hit rate: {model_hit_rate:.1f}% \| Cost: {cost_str}")
	print()

	print(f" {'─' * (w - 4)}")
	print(f" TOTAL ESTIMATED COST: ${grand_total_cost:,.2f}")
	print(f" Cost per active day: ${grand_total_cost / num_active_days:,.2f}")
	print(f" Cost per interaction: ${grand_total_cost / n if n else 0:,.4f}")
	print()

	# --- Hypothetical: What if no caching? ---
	print("-" * w)
	print(" WHAT IF: No prompt caching?")
	print("-" * w)
	print()
	nocache_cost = 0.0
	for model_id, totals in sorted_models:
	pricing = get_pricing(model_id)
	if not pricing:
	continue
	# Without caching: all cache_write + cache_read would be base input
	mtok = 1_000_000
	all_input = totals["base_input"] + totals["cache_write_5m"] + totals["cache_write_1h"] + totals["cache_read"]
	nocache_cost += all_input / mtok * pricing["input"] + totals["output"] / mtok * pricing["output"]

	savings = nocache_cost - grand_total_cost
	savings_pct = savings / nocache_cost * 100 if nocache_cost else 0
	print(f" Cost without caching: ${nocache_cost:,.2f}")
	print(f" Actual cost: ${grand_total_cost:,.2f}")
	print(f" Savings from caching: ${savings:,.2f} ({savings_pct:.1f}%)")
	print()

	# --- Top 5 Busiest Days ---
	if history_dates:
	print("-" * w)
	print(" TOP 5 BUSIEST DAYS")
	print("-" * w)
	sorted_days = sorted(history_dates.items(), key=lambda x: x[1], reverse=True)[:5]
	for day, count in sorted_days:
	print(f" {day}: {count} interactions")
	print()


	if __name__ == "__main__":
	main()
No results found