Skip to content

Instantly share code, notes, and snippets.

@dnnspaul
Last active October 6, 2025 12:18
Show Gist options
  • Save dnnspaul/bfe54f51ed91a5eac6618abe30476b2e to your computer and use it in GitHub Desktop.
Save dnnspaul/bfe54f51ed91a5eac6618abe30476b2e to your computer and use it in GitHub Desktop.
Claude Code Cost Analysis
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.9"
# dependencies = [
# "pandas>=2.0.0",
# "plotly>=6.0.0",
# "nbformat>=5.0.0",
# ]
# ///
"""
Claude Code Cost Visualizer
Run:
curl -sL https://gist.github.com/dnnspaul/bfe54f51ed91a5eac6618abe30476b2e/raw | uv run --script -
Credits to https://gist.github.com/esc5221/df0a0c3c068b8dd92282837389addb35 for making the base script.
Initially it used `costUSD`, but the variable isn't available in my .jsonl files, so I adjusted the
script for using the tokens and calculate the costs based on that.
This script analyzes Claude Code usage costs from .jsonl files in the Claude projects directory.
It calculates costs based on token usage from the 'usage' field in each message, using
model-specific pricing for different Claude variants with full prompt caching support:
Supports ALL Anthropic Claude models with accurate pricing:
Latest Models (Claude 4):
- Claude 4 Opus: $15/million input, $75/million output tokens
- Claude 4 Sonnet: $3/million input, $15/million output tokens
- Claude 4.5 Sonnet: $3/million input, $15/million output tokens
Current Models (Claude 3.5):
- Claude 3.5 Sonnet: $3/million input, $15/million output tokens
- Claude 3.5 Haiku: $0.8/million input, $4/million output tokens
Legacy Models (Claude 3, 2, Instant):
- Claude 3 Opus: $15/million input, $75/million output tokens
- Claude 3 Sonnet: $3/million input, $15/million output tokens
- Claude 3 Haiku: $0.25/million input, $1.25/million output tokens
- Claude 2.x: $8/million input, $24/million output tokens
- Claude Instant: $0.8-1.63/million input, $2.4-5.51/million output tokens
Prompt Caching (significant savings on supported models):
- Cache reads: ~90% discount (e.g., $0.30/million for Sonnet vs $3/million)
- Cache writes: slight premium for 5m cache, higher for 1h cache
- Automatically detects and applies correct pricing for cached vs non-cached tokens
- Legacy models (Claude 2, Instant) have no caching discounts
It creates an interactive visualization with two charts:
- Daily costs by project (stacked area chart)
- Cumulative costs over time (line/bar chart)
The visualization is saved as an HTML file with interactive features and also displayed if possible.
Terminal output shows a simple, aligned cost summary by project.
Usage:
./claude_code_cost_visualizer.py
(Dependencies will be automatically installed by uv)
"""
import os
import json
import glob
import pandas as pd
import plotly.graph_objects as go
from collections import defaultdict
from plotly.subplots import make_subplots # Moved import here for consistency
# Suppress pandas future warning about downcasting
pd.set_option('future.no_silent_downcasting', True)
# Claude model pricing configuration (as of January 2025)
# Source: Anthropic pricing page - costs are per million tokens
# Includes prompt caching pricing for accurate cost calculation
MODEL_PRICING = {
# Claude 4 models (latest generation)
"claude-4-opus": {
"input": 15.0, "output": 75.0,
"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
},
"claude-4-sonnet": {
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-4-5-sonnet": {
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-sonnet-4-5-20250929": { # Specific version used in your JSONL files
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-sonnet-4-20250514": { # Specific version used in your JSONL files
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
# Claude 3.5 models with caching support
"claude-3.5-sonnet": {
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-3-5-sonnet-20241022": { # Latest 3.5 Sonnet with date
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-3-5-sonnet-20240620": { # Previous 3.5 Sonnet version
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-3.5-haiku": {
"input": 0.8, "output": 4.0,
"cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08
},
"claude-3-5-haiku-20241022": { # Latest 3.5 Haiku with date
"input": 0.8, "output": 4.0,
"cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08
},
# Claude 3 models (legacy) with caching support
"claude-3-opus": {
"input": 15.0, "output": 75.0,
"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
},
"claude-3-opus-20240229": { # Specific Opus version
"input": 15.0, "output": 75.0,
"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
},
"claude-3-sonnet": {
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-3-sonnet-20240229": { # Specific Sonnet version
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
},
"claude-3-haiku": {
"input": 0.25, "output": 1.25,
"cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03
},
"claude-3-haiku-20240307": { # Specific Haiku version
"input": 0.25, "output": 1.25,
"cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03
},
# Claude 2 models (legacy, limited caching)
"claude-2.1": {
"input": 8.0, "output": 24.0,
"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount
},
"claude-2.0": {
"input": 8.0, "output": 24.0,
"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount
},
"claude-2": {
"input": 8.0, "output": 24.0,
"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount
},
# Claude Instant (legacy, deprecated)
"claude-instant-1.2": {
"input": 0.8, "output": 2.4,
"cache_write_5m": 0.8, "cache_write_1h": 0.8, "cache_read": 0.8 # No caching discount
},
"claude-instant-1": {
"input": 1.63, "output": 5.51,
"cache_write_5m": 1.63, "cache_write_1h": 1.63, "cache_read": 1.63 # No caching discount
},
# Default fallback for unknown models (using 3.5 Sonnet pricing)
"default": {
"input": 3.0, "output": 15.0,
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
}
}
# To add support for new models, simply add them to the MODEL_PRICING dictionary above
# with their respective input and output token costs per million tokens.
# Example: "claude-4-opus": {"input": 20.0, "output": 100.0}
# Find all JSONL files in the Claude projects directory
# Use os.path.expanduser to correctly resolve the '~' to the user's home directory
project_dir = os.path.expanduser("~/.claude/projects/")
jsonl_files = glob.glob(f"{project_dir}/**/*.jsonl", recursive=True)
# Dictionary to store data: {date: {project: cost}}
data = defaultdict(lambda: defaultdict(float))
project_names = set()
total_cost = 0
model_usage = defaultdict(int) # Track model usage counts
model_costs = defaultdict(float) # Track costs per model
monthly_costs = defaultdict(float) # Track costs per month
# Process each JSONL file
for file_path in jsonl_files:
try:
# Extract project name from path
parts = file_path.split(os.sep) # Use os.sep for platform-independent path splitting
try:
# Find the '.claude' part and get the project name after 'projects'
# Adjusting index to handle both hardcoded path structure and '~' expanded paths
claude_idx = -1
for i, part in enumerate(parts):
if part == '.claude':
claude_idx = i
break
if claude_idx != -1 and claude_idx + 2 < len(parts): # Ensure 'projects' and project name exist
# If the path looks like /home/user/.claude/projects/project_name/...
project_name = parts[claude_idx + 2]
# Further cleanup for names like '-Users-lullu-'
project_name = project_name.replace('-Users-lullu-', '').replace('-', '/')
else:
project_name = "unknown" # Fallback if structure not as expected
except ValueError:
# Fallback if '.claude' not in path
project_name = os.path.basename(os.path.dirname(file_path))
project_names.add(project_name)
# Process each line in the JSONL file
with open(file_path, 'r') as f:
for line in f:
try:
entry = json.loads(line)
# Calculate cost from token usage (if available)
cost = 0
if 'message' in entry and 'usage' in entry['message']:
usage = entry['message']['usage']
model = entry['message'].get('model', 'unknown')
# Get pricing for this model, fallback to default if unknown
pricing = MODEL_PRICING.get(model, MODEL_PRICING['default'])
# If this is an unknown model, add it to our usage tracking for visibility
if model not in MODEL_PRICING and model != 'unknown':
print(f"Warning: Unknown model '{model}' found. Using default pricing.")
# Get token counts
input_tokens = usage.get('input_tokens', 0)
cache_creation_tokens = usage.get('cache_creation_input_tokens', 0)
cache_read_tokens = usage.get('cache_read_input_tokens', 0)
output_tokens = usage.get('output_tokens', 0)
# Get cache creation info to determine cache type
cache_creation_info = usage.get('cache_creation', {})
ephemeral_5m_tokens = cache_creation_info.get('ephemeral_5m_input_tokens', 0)
ephemeral_1h_tokens = cache_creation_info.get('ephemeral_1h_input_tokens', 0)
# Calculate costs for different token types
# Regular input tokens at standard rate
regular_input_cost = (input_tokens / 1_000_000) * pricing['input']
# Cache creation tokens (assume 5m cache if not specified in cache_creation)
if ephemeral_5m_tokens > 0:
cache_write_cost = (ephemeral_5m_tokens / 1_000_000) * pricing['cache_write_5m']
elif ephemeral_1h_tokens > 0:
cache_write_cost = (ephemeral_1h_tokens / 1_000_000) * pricing['cache_write_1h']
elif cache_creation_tokens > 0:
# Fallback: assume 5m cache for unspecified cache creation
cache_write_cost = (cache_creation_tokens / 1_000_000) * pricing['cache_write_5m']
else:
cache_write_cost = 0
# Cache read tokens at heavily discounted rate
cache_read_cost = (cache_read_tokens / 1_000_000) * pricing['cache_read']
# Output tokens at standard rate
output_cost = (output_tokens / 1_000_000) * pricing['output']
# Total cost
cost = regular_input_cost + cache_write_cost + cache_read_cost + output_cost
# Track model usage statistics
model_usage[model] += 1
model_costs[model] += cost
if cost > 0:
# Extract date from timestamp
timestamp = entry.get('timestamp')
if timestamp:
date = timestamp.split('T')[0] # Get YYYY-MM-DD part
month = date[:7] # Get YYYY-MM part for monthly tracking
# Add cost to the appropriate date and project
data[date][project_name] += cost
monthly_costs[month] += cost
total_cost += cost
except (json.JSONDecodeError, KeyError):
continue # Skip invalid lines
except Exception as e:
print(f"Error processing file {file_path}: {e}")
continue
# Convert to DataFrame for easier plotting
dates = sorted(data.keys())
projects = sorted(list(project_names)) # Convert set to list for sorting
if dates:
# Create a complete date range from first to last date to fill gaps
start_date = pd.to_datetime(dates[0])
end_date = pd.to_datetime(dates[-1])
complete_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
complete_dates = [date.strftime('%Y-%m-%d') for date in complete_date_range]
# Create DataFrame with complete date range
df = pd.DataFrame(index=complete_dates, columns=projects)
# Fill in the actual data
for date in complete_dates:
for project in projects:
df.loc[date, project] = data[date].get(project, 0)
# Fill NaN values with 0
df = df.fillna(0)
else:
# Fallback for empty data
df = pd.DataFrame()
# Only create visualization if we have data
if not df.empty and len(projects) > 0:
# Create a single figure with two subplots sharing the x-axis
# Create figure with 2 rows (subplots stacked vertically)
fig = make_subplots(
rows=2,
cols=1,
shared_xaxes=True, # Share x-axes between subplots
vertical_spacing=0.1, # Add some space between the subplots
subplot_titles=('Daily Claude Code Usage Cost by Project', 'Cumulative Claude Code Usage Cost')
)
# 1. Daily stacked area chart (top subplot)
for project in projects:
fig.add_trace(
go.Scatter(
x=df.index,
y=df[project],
mode='lines',
name=project,
stackgroup='one',
hoverinfo='x+y+name'
),
row=1, col=1 # First subplot (top)
)
# Add total cost line to daily chart
df['Total'] = df.sum(axis=1)
fig.add_trace(
go.Scatter(
x=df.index,
y=df['Total'],
mode='lines',
name='Total Cost',
line=dict(width=2, color='black', dash='dash'),
),
row=1, col=1 # First subplot (top)
)
# 2. Cumulative cost chart (bottom subplot)
df['Cumulative'] = df['Total'].cumsum()
fig.add_trace(
go.Scatter(
x=df.index,
y=df['Cumulative'],
mode='lines',
name='Cumulative Cost',
line=dict(width=3, color='red'),
fill='tozeroy',
),
row=2, col=1 # Second subplot (bottom)
)
# Optionally add daily cost bars to the cumulative chart
fig.add_trace(
go.Bar(
x=df.index,
y=df['Total'],
name='Daily Cost',
marker_color='rgba(55, 83, 109, 0.7)',
opacity=0.7,
showlegend=False, # Hide from legend since it's already in the top subplot
),
row=2, col=1 # Second subplot (bottom)
)
# Update layout for entire figure
fig.update_layout(
title=f'Claude Code Usage Cost Analysis (Total: ${total_cost:.2f})',
hovermode='x unified', # Unified hover mode across all subplots
legend_title='Projects/Costs',
width=1600,
height=900,
margin=dict(l=50, r=50, t=100, b=100),
)
# Update y-axis labels for each subplot
fig.update_yaxes(title_text="Daily Cost (USD)", row=1, col=1)
fig.update_yaxes(title_text="Cumulative Cost (USD)", row=2, col=1)
fig.update_xaxes(title_text="Date", row=2, col=1) # Only add x-axis title to bottom subplot
# Save HTML file
html_path = os.path.expanduser("~/claude_code_cost_analysis.html")
fig.write_html(html_path)
print(f"Saved Claude Code cost analysis chart to {html_path}")
else:
print("No data found to create visualization.")
# Print summary with simple formatting
print("\n" + "="*50)
print(f"Total Claude Code usage cost: ${total_cost:.2f}")
print(f"Script supports {len(MODEL_PRICING)-1} Claude models + fallback pricing")
# Print model usage statistics
if model_usage:
print("\nModel usage statistics:")
print("-"*50)
max_model_len = max([len(model) for model in model_usage.keys()]) if model_usage else 0
for model, count in sorted(model_usage.items(), key=lambda x: model_costs[x[0]], reverse=True):
cost = model_costs[model]
cost_str = f"${cost:.2f}"
print(f"{model.ljust(max_model_len)} : {count:4d} calls, {cost_str.rjust(10)}")
# Print monthly cost breakdown
if monthly_costs:
print("\nMonthly cost breakdown:")
print("-"*50)
# Sort months chronologically
sorted_months = sorted(monthly_costs.items())
for month, cost in sorted_months:
cost_str = f"${cost:.2f}"
# Convert YYYY-MM to more readable format
year, month_num = month.split('-')
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month_name = month_names[int(month_num) - 1]
readable_month = f"{month_name} {year}"
print(f"{readable_month.ljust(12)} : {cost_str.rjust(10)}")
# Calculate average monthly cost
if len(monthly_costs) > 1:
avg_monthly = sum(monthly_costs.values()) / len(monthly_costs)
print(f"{''.ljust(12)} : {''.rjust(10)}")
avg_str = f"${avg_monthly:.2f}"
print(f"{'Avg/month'.ljust(12)} : {avg_str.rjust(10)}")
# Get project totals and sort by cost (only if we have data)
if not df.empty and len(projects) > 0:
project_totals = df[projects].sum().sort_values(ascending=False)
# Calculate the max length of project names for alignment
max_project_len = max([len(p) for p in projects]) if projects else 0
print("\nCost by project:")
print("-"*50)
# Print sorted projects with dollar amounts aligned
for project, cost in project_totals.items():
cost_str = f"${cost:.2f}"
print(f"{project.ljust(max_project_len)} : {cost_str.rjust(10)}")
elif project_names:
print("\nNo cost data found for projects.")
else:
print("\nNo projects found.")
print("="*50)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment