Skip to content

Instantly share code, notes, and snippets.

@ericboehs
Created February 18, 2026 17:55
Show Gist options
  • Select an option

  • Save ericboehs/9220fe2601e15eb052a889289b8e8353 to your computer and use it in GitHub Desktop.

Select an option

Save ericboehs/9220fe2601e15eb052a889289b8e8353 to your computer and use it in GitHub Desktop.
Claude Code usage stats analyzer - interactions/day, tokens, cache hit rate, estimated API costs
#!/usr/bin/env python3
"""Claude Code usage stats analyzer.
Parses local Claude Code session data to show:
- Average interactions per day
- Average input/output tokens per interaction
- Average tool call chain depth
- Token breakdown by category (base, cache write, cache read)
- Estimated API costs by model
Usage:
python3 claude-code-stats.py
"""
import json
import sys
from collections import Counter, defaultdict
from datetime import datetime
from pathlib import Path
CLAUDE_DIR = Path.home() / ".claude"
PROJECTS_DIR = CLAUDE_DIR / "projects"
HISTORY_FILE = CLAUDE_DIR / "history.jsonl"
# Pricing per million tokens (USD) - https://platform.claude.com/docs/en/about-claude/pricing
MODEL_PRICING = {
"claude-opus-4-6": {
"label": "Opus 4.6",
"input": 5.00,
"output": 25.00,
"cache_write_5m": 6.25,
"cache_write_1h": 10.00,
"cache_read": 0.50,
},
"claude-opus-4-5": {
"label": "Opus 4.5",
"input": 5.00,
"output": 25.00,
"cache_write_5m": 6.25,
"cache_write_1h": 10.00,
"cache_read": 0.50,
},
"claude-sonnet-4-6": {
"label": "Sonnet 4.6",
"input": 3.00,
"output": 15.00,
"cache_write_5m": 3.75,
"cache_write_1h": 6.00,
"cache_read": 0.30,
},
"claude-sonnet-4-5": {
"label": "Sonnet 4.5",
"input": 3.00,
"output": 15.00,
"cache_write_5m": 3.75,
"cache_write_1h": 6.00,
"cache_read": 0.30,
},
"claude-sonnet-4-0": {
"label": "Sonnet 4",
"input": 3.00,
"output": 15.00,
"cache_write_5m": 3.75,
"cache_write_1h": 6.00,
"cache_read": 0.30,
},
"claude-haiku-4-5": {
"label": "Haiku 4.5",
"input": 1.00,
"output": 5.00,
"cache_write_5m": 1.25,
"cache_write_1h": 2.00,
"cache_read": 0.10,
},
}
# Fallback pricing tiers for unknown models
PRICING_FALLBACKS = [
("opus", {
"label": "Opus (other)",
"input": 5.00, "output": 25.00,
"cache_write_5m": 6.25, "cache_write_1h": 10.00, "cache_read": 0.50,
}),
("sonnet", {
"label": "Sonnet (other)",
"input": 3.00, "output": 15.00,
"cache_write_5m": 3.75, "cache_write_1h": 6.00, "cache_read": 0.30,
}),
("haiku", {
"label": "Haiku (other)",
"input": 1.00, "output": 5.00,
"cache_write_5m": 1.25, "cache_write_1h": 2.00, "cache_read": 0.10,
}),
]
def get_pricing(model_id):
if not model_id:
return None
normalized = model_id.replace(".", "-").lower()
# Strip date suffixes like -20250301
for known_id, pricing in MODEL_PRICING.items():
if normalized.startswith(known_id):
return pricing
for keyword, pricing in PRICING_FALLBACKS:
if keyword in normalized:
return pricing
return None
def find_session_files():
if not PROJECTS_DIR.exists():
return []
return list(PROJECTS_DIR.rglob("*.jsonl"))
def parse_history_dates():
dates = defaultdict(int)
if not HISTORY_FILE.exists():
return dates
with open(HISTORY_FILE) as f:
for line in f:
try:
d = json.loads(line)
ts = d.get("timestamp")
if ts:
dt = datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%d")
dates[dt] += 1
except (json.JSONDecodeError, ValueError):
continue
return dates
def analyze_sessions(session_files):
interactions = []
token_totals = {
"base_input": 0,
"cache_write_5m": 0,
"cache_write_1h": 0,
"cache_read": 0,
"output": 0,
}
model_totals = defaultdict(lambda: {
"base_input": 0, "cache_write_5m": 0, "cache_write_1h": 0,
"cache_read": 0, "output": 0, "turns": 0,
})
for path in session_files:
try:
with open(path) as f:
lines = f.readlines()
except OSError:
continue
messages = []
for line in lines:
try:
d = json.loads(line)
if d.get("type") in ("user", "assistant"):
messages.append(d)
except json.JSONDecodeError:
continue
idx = 0
while idx < len(messages):
if messages[idx].get("type") == "user":
inp_total = 0
out_total = 0
chain_depth = 0
idx += 1
while idx < len(messages) and messages[idx].get("type") == "assistant":
msg = messages[idx].get("message", {})
usage = msg.get("usage", {})
model = msg.get("model", "unknown")
base_input = usage.get("input_tokens", 0)
cache_read = usage.get("cache_read_input_tokens", 0)
output = usage.get("output_tokens", 0)
# Cache write breakdown
cache_creation = usage.get("cache_creation", {})
cache_write_5m = cache_creation.get("ephemeral_5m_input_tokens", 0)
cache_write_1h = cache_creation.get("ephemeral_1h_input_tokens", 0)
# Fallback: if no granular breakdown, use total cache_creation
total_cache_creation = usage.get("cache_creation_input_tokens", 0)
if cache_write_5m == 0 and cache_write_1h == 0 and total_cache_creation > 0:
cache_write_5m = total_cache_creation
token_totals["base_input"] += base_input
token_totals["cache_write_5m"] += cache_write_5m
token_totals["cache_write_1h"] += cache_write_1h
token_totals["cache_read"] += cache_read
token_totals["output"] += output
model_totals[model]["base_input"] += base_input
model_totals[model]["cache_write_5m"] += cache_write_5m
model_totals[model]["cache_write_1h"] += cache_write_1h
model_totals[model]["cache_read"] += cache_read
model_totals[model]["output"] += output
model_totals[model]["turns"] += 1
inp_total += base_input + cache_read + cache_write_5m + cache_write_1h
out_total += output
chain_depth += 1
idx += 1
if inp_total > 0 or out_total > 0:
interactions.append({
"input_tokens": inp_total,
"output_tokens": out_total,
"chain_depth": chain_depth,
})
else:
idx += 1
return interactions, token_totals, dict(model_totals)
def calc_cost(totals, pricing):
mtok = 1_000_000
return (
totals["base_input"] / mtok * pricing["input"]
+ totals["cache_write_5m"] / mtok * pricing["cache_write_5m"]
+ totals["cache_write_1h"] / mtok * pricing["cache_write_1h"]
+ totals["cache_read"] / mtok * pricing["cache_read"]
+ totals["output"] / mtok * pricing["output"]
)
def median(lst):
if not lst:
return 0
s = sorted(lst)
n = len(s)
if n % 2 == 1:
return s[n // 2]
return (s[n // 2 - 1] + s[n // 2]) / 2
def percentile(lst, p):
if not lst:
return 0
s = sorted(lst)
idx = int(len(s) * p / 100)
return s[min(idx, len(s) - 1)]
def fmt(n):
"""Format token counts for display."""
if n >= 1_000_000_000:
return f"{n / 1_000_000_000:.1f}B"
if n >= 1_000_000:
return f"{n / 1_000_000:.1f}M"
if n >= 1_000:
return f"{n / 1_000:.1f}K"
return f"{n:.0f}"
def main():
if not CLAUDE_DIR.exists():
print("Error: ~/.claude directory not found.", file=sys.stderr)
print("Are you sure Claude Code is installed?", file=sys.stderr)
sys.exit(1)
session_files = find_session_files()
if not session_files:
print("No session files found in ~/.claude/projects/", file=sys.stderr)
sys.exit(1)
history_dates = parse_history_dates()
print(f"Analyzing {len(session_files)} sessions...", end="", flush=True)
interactions, token_totals, model_totals = analyze_sessions(session_files)
print(" done.\n")
input_tokens = [i["input_tokens"] for i in interactions]
output_tokens = [i["output_tokens"] for i in interactions]
chain_depths = [i["chain_depth"] for i in interactions]
n = len(interactions)
num_active_days = len(history_dates) if history_dates else 1
total_history_msgs = sum(history_dates.values()) if history_dates else n
date_min = min(history_dates.keys()) if history_dates else "?"
date_max = max(history_dates.keys()) if history_dates else "?"
avg_per_day = total_history_msgs / num_active_days
avg_inp = sum(input_tokens) / n if n else 0
avg_out = sum(output_tokens) / n if n else 0
avg_depth = sum(chain_depths) / n if n else 0
# Token category totals
t = token_totals
total_all_input = t["base_input"] + t["cache_write_5m"] + t["cache_write_1h"] + t["cache_read"]
cache_hit_rate = t["cache_read"] / total_all_input * 100 if total_all_input else 0
w = 60
print("=" * w)
print(" CLAUDE CODE USAGE STATS")
print("=" * w)
print()
print(f" Date range: {date_min} to {date_max}")
print(f" Active days: {num_active_days}")
print(f" Sessions: {len(session_files):,}")
print(f" Interactions: {n:,}")
print()
# --- Key Metrics ---
print("-" * w)
print(" KEY METRICS")
print("-" * w)
print()
print(f" Avg interactions/day: {avg_per_day:.1f}")
print()
print(f" Avg input tokens/interaction: {fmt(avg_inp)}")
print(f" Median: {fmt(median(input_tokens))}")
print(f" P90: {fmt(percentile(input_tokens, 90))}")
print()
print(f" Avg output tokens/interaction: {fmt(avg_out)}")
print(f" Median: {fmt(median(output_tokens))}")
print(f" P90: {fmt(percentile(output_tokens, 90))}")
print()
print(f" Avg tool call chain depth: {avg_depth:.1f}")
print(f" Median: {median(chain_depths):.0f}")
print(f" P90: {percentile(chain_depths, 90)}")
print()
# --- Token Breakdown ---
print("-" * w)
print(" TOKEN BREAKDOWN")
print("-" * w)
print()
print(f" Base input tokens: {t['base_input']:>15,} ({fmt(t['base_input'])})")
print(f" Cache write (5m): {t['cache_write_5m']:>15,} ({fmt(t['cache_write_5m'])})")
print(f" Cache write (1h): {t['cache_write_1h']:>15,} ({fmt(t['cache_write_1h'])})")
print(f" Cache read (hits): {t['cache_read']:>15,} ({fmt(t['cache_read'])})")
print(f" Output tokens: {t['output']:>15,} ({fmt(t['output'])})")
print(f" {'─' * 15}")
print(f" Total input tokens: {total_all_input:>15,} ({fmt(total_all_input)})")
print()
print(f" Cache hit rate: {cache_hit_rate:.1f}%")
print(f" (cache reads / total input tokens)")
print()
# --- Chain Depth ---
print("-" * w)
print(" CHAIN DEPTH DISTRIBUTION")
print("-" * w)
if chain_depths:
depth_counts = Counter(chain_depths)
max_count = max(depth_counts.values())
for depth in sorted(depth_counts.keys())[:10]:
count = depth_counts[depth]
bar_len = int(count / max_count * 30)
bar = "#" * bar_len
print(f" {depth:2d}: {count:6,} {bar}")
deep = sum(v for k, v in depth_counts.items() if k > 10)
if deep:
print(f" >10: {deep:6,}")
print(f" max: {max(chain_depths)}")
print()
# --- Cost Estimate ---
print("-" * w)
print(" ESTIMATED API COST (by model)")
print("-" * w)
print()
grand_total_cost = 0.0
sorted_models = sorted(model_totals.items(), key=lambda x: x[1]["turns"], reverse=True)
for model_id, totals in sorted_models:
pricing = get_pricing(model_id)
if not pricing:
label = model_id
cost_str = "(unknown pricing)"
else:
label = pricing["label"]
cost = calc_cost(totals, pricing)
grand_total_cost += cost
cost_str = f"${cost:,.2f}"
model_input = totals["base_input"] + totals["cache_write_5m"] + totals["cache_write_1h"] + totals["cache_read"]
model_cache_hits = totals["cache_read"]
model_hit_rate = model_cache_hits / model_input * 100 if model_input else 0
print(f" {label} ({model_id})")
print(f" Turns: {totals['turns']:,} | Input: {fmt(model_input)} | Output: {fmt(totals['output'])}")
print(f" Cache hit rate: {model_hit_rate:.1f}% | Cost: {cost_str}")
print()
print(f" {'─' * (w - 4)}")
print(f" TOTAL ESTIMATED COST: ${grand_total_cost:,.2f}")
print(f" Cost per active day: ${grand_total_cost / num_active_days:,.2f}")
print(f" Cost per interaction: ${grand_total_cost / n if n else 0:,.4f}")
print()
# --- Hypothetical: What if no caching? ---
print("-" * w)
print(" WHAT IF: No prompt caching?")
print("-" * w)
print()
nocache_cost = 0.0
for model_id, totals in sorted_models:
pricing = get_pricing(model_id)
if not pricing:
continue
# Without caching: all cache_write + cache_read would be base input
mtok = 1_000_000
all_input = totals["base_input"] + totals["cache_write_5m"] + totals["cache_write_1h"] + totals["cache_read"]
nocache_cost += all_input / mtok * pricing["input"] + totals["output"] / mtok * pricing["output"]
savings = nocache_cost - grand_total_cost
savings_pct = savings / nocache_cost * 100 if nocache_cost else 0
print(f" Cost without caching: ${nocache_cost:,.2f}")
print(f" Actual cost: ${grand_total_cost:,.2f}")
print(f" Savings from caching: ${savings:,.2f} ({savings_pct:.1f}%)")
print()
# --- Top 5 Busiest Days ---
if history_dates:
print("-" * w)
print(" TOP 5 BUSIEST DAYS")
print("-" * w)
sorted_days = sorted(history_dates.items(), key=lambda x: x[1], reverse=True)[:5]
for day, count in sorted_days:
print(f" {day}: {count} interactions")
print()
if __name__ == "__main__":
main()
@ericboehs
Copy link
Author

Claude Code Usage Stats

Analyzes your local Claude Code session data to give you a full picture of your usage and estimated API costs.

Requirements

  • Python 3.6+
  • Claude Code installed (reads from ~/.claude/)

Usage

curl -sL https://gist.githubusercontent.com/ericboehs/9220fe2601e15eb052a889289b8e8353/raw/claude-code-stats.py | python3

Or download and run:

wget https://gist.githubusercontent.com/ericboehs/9220fe2601e15eb052a889289b8e8353/raw/claude-code-stats.py
python3 claude-code-stats.py

What it reports

  • Interactions/day — average user messages per active day
  • Input/output tokens per interaction — avg, median, P90
  • Tool call chain depth — how many consecutive assistant turns per interaction
  • Token breakdown — base input, cache writes (5m/1h TTL), cache reads
  • Cache hit rate — percentage of input tokens served from cache
  • Estimated API cost — broken down by model, using current Anthropic pricing
  • "What if no caching?" — shows how much prompt caching is saving you
  • Busiest days — top 5 days by interaction count

Notes

  • All data is read locally from ~/.claude/projects/ and ~/.claude/history.jsonl — nothing is sent anywhere
  • Cost estimates use public Anthropic API pricing as of Feb 2026
  • Models without known pricing (e.g. third-party) show "(unknown pricing)" and are excluded from totals

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment