Last active
October 6, 2025 12:18
-
-
Save dnnspaul/bfe54f51ed91a5eac6618abe30476b2e to your computer and use it in GitHub Desktop.
Claude Code Cost Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S uv run --script | |
# /// script | |
# requires-python = ">=3.9" | |
# dependencies = [ | |
# "pandas>=2.0.0", | |
# "plotly>=6.0.0", | |
# "nbformat>=5.0.0", | |
# ] | |
# /// | |
""" | |
Claude Code Cost Visualizer | |
Run: | |
curl -sL https://gist.github.com/dnnspaul/bfe54f51ed91a5eac6618abe30476b2e/raw | uv run --script - | |
Credits to https://gist.github.com/esc5221/df0a0c3c068b8dd92282837389addb35 for making the base script. | |
Initially it used `costUSD`, but the variable isn't available in my .jsonl files, so I adjusted the | |
script for using the tokens and calculate the costs based on that. | |
This script analyzes Claude Code usage costs from .jsonl files in the Claude projects directory. | |
It calculates costs based on token usage from the 'usage' field in each message, using | |
model-specific pricing for different Claude variants with full prompt caching support: | |
Supports ALL Anthropic Claude models with accurate pricing: | |
Latest Models (Claude 4): | |
- Claude 4 Opus: $15/million input, $75/million output tokens | |
- Claude 4 Sonnet: $3/million input, $15/million output tokens | |
- Claude 4.5 Sonnet: $3/million input, $15/million output tokens | |
Current Models (Claude 3.5): | |
- Claude 3.5 Sonnet: $3/million input, $15/million output tokens | |
- Claude 3.5 Haiku: $0.8/million input, $4/million output tokens | |
Legacy Models (Claude 3, 2, Instant): | |
- Claude 3 Opus: $15/million input, $75/million output tokens | |
- Claude 3 Sonnet: $3/million input, $15/million output tokens | |
- Claude 3 Haiku: $0.25/million input, $1.25/million output tokens | |
- Claude 2.x: $8/million input, $24/million output tokens | |
- Claude Instant: $0.8-1.63/million input, $2.4-5.51/million output tokens | |
Prompt Caching (significant savings on supported models): | |
- Cache reads: ~90% discount (e.g., $0.30/million for Sonnet vs $3/million) | |
- Cache writes: slight premium for 5m cache, higher for 1h cache | |
- Automatically detects and applies correct pricing for cached vs non-cached tokens | |
- Legacy models (Claude 2, Instant) have no caching discounts | |
It creates an interactive visualization with two charts: | |
- Daily costs by project (stacked area chart) | |
- Cumulative costs over time (line/bar chart) | |
The visualization is saved as an HTML file with interactive features and also displayed if possible. | |
Terminal output shows a simple, aligned cost summary by project. | |
Usage: | |
./claude_code_cost_visualizer.py | |
(Dependencies will be automatically installed by uv) | |
""" | |
import os | |
import json | |
import glob | |
import pandas as pd | |
import plotly.graph_objects as go | |
from collections import defaultdict | |
from plotly.subplots import make_subplots # Moved import here for consistency | |
# Suppress pandas future warning about downcasting | |
pd.set_option('future.no_silent_downcasting', True) | |
# Claude model pricing configuration (as of January 2025) | |
# Source: Anthropic pricing page - costs are per million tokens | |
# Includes prompt caching pricing for accurate cost calculation | |
MODEL_PRICING = { | |
# Claude 4 models (latest generation) | |
"claude-4-opus": { | |
"input": 15.0, "output": 75.0, | |
"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50 | |
}, | |
"claude-4-sonnet": { | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-4-5-sonnet": { | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-sonnet-4-5-20250929": { # Specific version used in your JSONL files | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-sonnet-4-20250514": { # Specific version used in your JSONL files | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
# Claude 3.5 models with caching support | |
"claude-3.5-sonnet": { | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-3-5-sonnet-20241022": { # Latest 3.5 Sonnet with date | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-3-5-sonnet-20240620": { # Previous 3.5 Sonnet version | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-3.5-haiku": { | |
"input": 0.8, "output": 4.0, | |
"cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08 | |
}, | |
"claude-3-5-haiku-20241022": { # Latest 3.5 Haiku with date | |
"input": 0.8, "output": 4.0, | |
"cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08 | |
}, | |
# Claude 3 models (legacy) with caching support | |
"claude-3-opus": { | |
"input": 15.0, "output": 75.0, | |
"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50 | |
}, | |
"claude-3-opus-20240229": { # Specific Opus version | |
"input": 15.0, "output": 75.0, | |
"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50 | |
}, | |
"claude-3-sonnet": { | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-3-sonnet-20240229": { # Specific Sonnet version | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
}, | |
"claude-3-haiku": { | |
"input": 0.25, "output": 1.25, | |
"cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03 | |
}, | |
"claude-3-haiku-20240307": { # Specific Haiku version | |
"input": 0.25, "output": 1.25, | |
"cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03 | |
}, | |
# Claude 2 models (legacy, limited caching) | |
"claude-2.1": { | |
"input": 8.0, "output": 24.0, | |
"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount | |
}, | |
"claude-2.0": { | |
"input": 8.0, "output": 24.0, | |
"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount | |
}, | |
"claude-2": { | |
"input": 8.0, "output": 24.0, | |
"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount | |
}, | |
# Claude Instant (legacy, deprecated) | |
"claude-instant-1.2": { | |
"input": 0.8, "output": 2.4, | |
"cache_write_5m": 0.8, "cache_write_1h": 0.8, "cache_read": 0.8 # No caching discount | |
}, | |
"claude-instant-1": { | |
"input": 1.63, "output": 5.51, | |
"cache_write_5m": 1.63, "cache_write_1h": 1.63, "cache_read": 1.63 # No caching discount | |
}, | |
# Default fallback for unknown models (using 3.5 Sonnet pricing) | |
"default": { | |
"input": 3.0, "output": 15.0, | |
"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30 | |
} | |
} | |
# To add support for new models, simply add them to the MODEL_PRICING dictionary above | |
# with their respective input and output token costs per million tokens. | |
# Example: "claude-4-opus": {"input": 20.0, "output": 100.0} | |
# Find all JSONL files in the Claude projects directory | |
# Use os.path.expanduser to correctly resolve the '~' to the user's home directory | |
project_dir = os.path.expanduser("~/.claude/projects/") | |
jsonl_files = glob.glob(f"{project_dir}/**/*.jsonl", recursive=True) | |
# Dictionary to store data: {date: {project: cost}} | |
data = defaultdict(lambda: defaultdict(float)) | |
project_names = set() | |
total_cost = 0 | |
model_usage = defaultdict(int) # Track model usage counts | |
model_costs = defaultdict(float) # Track costs per model | |
monthly_costs = defaultdict(float) # Track costs per month | |
# Process each JSONL file | |
for file_path in jsonl_files: | |
try: | |
# Extract project name from path | |
parts = file_path.split(os.sep) # Use os.sep for platform-independent path splitting | |
try: | |
# Find the '.claude' part and get the project name after 'projects' | |
# Adjusting index to handle both hardcoded path structure and '~' expanded paths | |
claude_idx = -1 | |
for i, part in enumerate(parts): | |
if part == '.claude': | |
claude_idx = i | |
break | |
if claude_idx != -1 and claude_idx + 2 < len(parts): # Ensure 'projects' and project name exist | |
# If the path looks like /home/user/.claude/projects/project_name/... | |
project_name = parts[claude_idx + 2] | |
# Further cleanup for names like '-Users-lullu-' | |
project_name = project_name.replace('-Users-lullu-', '').replace('-', '/') | |
else: | |
project_name = "unknown" # Fallback if structure not as expected | |
except ValueError: | |
# Fallback if '.claude' not in path | |
project_name = os.path.basename(os.path.dirname(file_path)) | |
project_names.add(project_name) | |
# Process each line in the JSONL file | |
with open(file_path, 'r') as f: | |
for line in f: | |
try: | |
entry = json.loads(line) | |
# Calculate cost from token usage (if available) | |
cost = 0 | |
if 'message' in entry and 'usage' in entry['message']: | |
usage = entry['message']['usage'] | |
model = entry['message'].get('model', 'unknown') | |
# Get pricing for this model, fallback to default if unknown | |
pricing = MODEL_PRICING.get(model, MODEL_PRICING['default']) | |
# If this is an unknown model, add it to our usage tracking for visibility | |
if model not in MODEL_PRICING and model != 'unknown': | |
print(f"Warning: Unknown model '{model}' found. Using default pricing.") | |
# Get token counts | |
input_tokens = usage.get('input_tokens', 0) | |
cache_creation_tokens = usage.get('cache_creation_input_tokens', 0) | |
cache_read_tokens = usage.get('cache_read_input_tokens', 0) | |
output_tokens = usage.get('output_tokens', 0) | |
# Get cache creation info to determine cache type | |
cache_creation_info = usage.get('cache_creation', {}) | |
ephemeral_5m_tokens = cache_creation_info.get('ephemeral_5m_input_tokens', 0) | |
ephemeral_1h_tokens = cache_creation_info.get('ephemeral_1h_input_tokens', 0) | |
# Calculate costs for different token types | |
# Regular input tokens at standard rate | |
regular_input_cost = (input_tokens / 1_000_000) * pricing['input'] | |
# Cache creation tokens (assume 5m cache if not specified in cache_creation) | |
if ephemeral_5m_tokens > 0: | |
cache_write_cost = (ephemeral_5m_tokens / 1_000_000) * pricing['cache_write_5m'] | |
elif ephemeral_1h_tokens > 0: | |
cache_write_cost = (ephemeral_1h_tokens / 1_000_000) * pricing['cache_write_1h'] | |
elif cache_creation_tokens > 0: | |
# Fallback: assume 5m cache for unspecified cache creation | |
cache_write_cost = (cache_creation_tokens / 1_000_000) * pricing['cache_write_5m'] | |
else: | |
cache_write_cost = 0 | |
# Cache read tokens at heavily discounted rate | |
cache_read_cost = (cache_read_tokens / 1_000_000) * pricing['cache_read'] | |
# Output tokens at standard rate | |
output_cost = (output_tokens / 1_000_000) * pricing['output'] | |
# Total cost | |
cost = regular_input_cost + cache_write_cost + cache_read_cost + output_cost | |
# Track model usage statistics | |
model_usage[model] += 1 | |
model_costs[model] += cost | |
if cost > 0: | |
# Extract date from timestamp | |
timestamp = entry.get('timestamp') | |
if timestamp: | |
date = timestamp.split('T')[0] # Get YYYY-MM-DD part | |
month = date[:7] # Get YYYY-MM part for monthly tracking | |
# Add cost to the appropriate date and project | |
data[date][project_name] += cost | |
monthly_costs[month] += cost | |
total_cost += cost | |
except (json.JSONDecodeError, KeyError): | |
continue # Skip invalid lines | |
except Exception as e: | |
print(f"Error processing file {file_path}: {e}") | |
continue | |
# Convert to DataFrame for easier plotting | |
dates = sorted(data.keys()) | |
projects = sorted(list(project_names)) # Convert set to list for sorting | |
if dates: | |
# Create a complete date range from first to last date to fill gaps | |
start_date = pd.to_datetime(dates[0]) | |
end_date = pd.to_datetime(dates[-1]) | |
complete_date_range = pd.date_range(start=start_date, end=end_date, freq='D') | |
complete_dates = [date.strftime('%Y-%m-%d') for date in complete_date_range] | |
# Create DataFrame with complete date range | |
df = pd.DataFrame(index=complete_dates, columns=projects) | |
# Fill in the actual data | |
for date in complete_dates: | |
for project in projects: | |
df.loc[date, project] = data[date].get(project, 0) | |
# Fill NaN values with 0 | |
df = df.fillna(0) | |
else: | |
# Fallback for empty data | |
df = pd.DataFrame() | |
# Only create visualization if we have data | |
if not df.empty and len(projects) > 0: | |
# Create a single figure with two subplots sharing the x-axis | |
# Create figure with 2 rows (subplots stacked vertically) | |
fig = make_subplots( | |
rows=2, | |
cols=1, | |
shared_xaxes=True, # Share x-axes between subplots | |
vertical_spacing=0.1, # Add some space between the subplots | |
subplot_titles=('Daily Claude Code Usage Cost by Project', 'Cumulative Claude Code Usage Cost') | |
) | |
# 1. Daily stacked area chart (top subplot) | |
for project in projects: | |
fig.add_trace( | |
go.Scatter( | |
x=df.index, | |
y=df[project], | |
mode='lines', | |
name=project, | |
stackgroup='one', | |
hoverinfo='x+y+name' | |
), | |
row=1, col=1 # First subplot (top) | |
) | |
# Add total cost line to daily chart | |
df['Total'] = df.sum(axis=1) | |
fig.add_trace( | |
go.Scatter( | |
x=df.index, | |
y=df['Total'], | |
mode='lines', | |
name='Total Cost', | |
line=dict(width=2, color='black', dash='dash'), | |
), | |
row=1, col=1 # First subplot (top) | |
) | |
# 2. Cumulative cost chart (bottom subplot) | |
df['Cumulative'] = df['Total'].cumsum() | |
fig.add_trace( | |
go.Scatter( | |
x=df.index, | |
y=df['Cumulative'], | |
mode='lines', | |
name='Cumulative Cost', | |
line=dict(width=3, color='red'), | |
fill='tozeroy', | |
), | |
row=2, col=1 # Second subplot (bottom) | |
) | |
# Optionally add daily cost bars to the cumulative chart | |
fig.add_trace( | |
go.Bar( | |
x=df.index, | |
y=df['Total'], | |
name='Daily Cost', | |
marker_color='rgba(55, 83, 109, 0.7)', | |
opacity=0.7, | |
showlegend=False, # Hide from legend since it's already in the top subplot | |
), | |
row=2, col=1 # Second subplot (bottom) | |
) | |
# Update layout for entire figure | |
fig.update_layout( | |
title=f'Claude Code Usage Cost Analysis (Total: ${total_cost:.2f})', | |
hovermode='x unified', # Unified hover mode across all subplots | |
legend_title='Projects/Costs', | |
width=1600, | |
height=900, | |
margin=dict(l=50, r=50, t=100, b=100), | |
) | |
# Update y-axis labels for each subplot | |
fig.update_yaxes(title_text="Daily Cost (USD)", row=1, col=1) | |
fig.update_yaxes(title_text="Cumulative Cost (USD)", row=2, col=1) | |
fig.update_xaxes(title_text="Date", row=2, col=1) # Only add x-axis title to bottom subplot | |
# Save HTML file | |
html_path = os.path.expanduser("~/claude_code_cost_analysis.html") | |
fig.write_html(html_path) | |
print(f"Saved Claude Code cost analysis chart to {html_path}") | |
else: | |
print("No data found to create visualization.") | |
# Print summary with simple formatting | |
print("\n" + "="*50) | |
print(f"Total Claude Code usage cost: ${total_cost:.2f}") | |
print(f"Script supports {len(MODEL_PRICING)-1} Claude models + fallback pricing") | |
# Print model usage statistics | |
if model_usage: | |
print("\nModel usage statistics:") | |
print("-"*50) | |
max_model_len = max([len(model) for model in model_usage.keys()]) if model_usage else 0 | |
for model, count in sorted(model_usage.items(), key=lambda x: model_costs[x[0]], reverse=True): | |
cost = model_costs[model] | |
cost_str = f"${cost:.2f}" | |
print(f"{model.ljust(max_model_len)} : {count:4d} calls, {cost_str.rjust(10)}") | |
# Print monthly cost breakdown | |
if monthly_costs: | |
print("\nMonthly cost breakdown:") | |
print("-"*50) | |
# Sort months chronologically | |
sorted_months = sorted(monthly_costs.items()) | |
for month, cost in sorted_months: | |
cost_str = f"${cost:.2f}" | |
# Convert YYYY-MM to more readable format | |
year, month_num = month.split('-') | |
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | |
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | |
month_name = month_names[int(month_num) - 1] | |
readable_month = f"{month_name} {year}" | |
print(f"{readable_month.ljust(12)} : {cost_str.rjust(10)}") | |
# Calculate average monthly cost | |
if len(monthly_costs) > 1: | |
avg_monthly = sum(monthly_costs.values()) / len(monthly_costs) | |
print(f"{''.ljust(12)} : {''.rjust(10)}") | |
avg_str = f"${avg_monthly:.2f}" | |
print(f"{'Avg/month'.ljust(12)} : {avg_str.rjust(10)}") | |
# Get project totals and sort by cost (only if we have data) | |
if not df.empty and len(projects) > 0: | |
project_totals = df[projects].sum().sort_values(ascending=False) | |
# Calculate the max length of project names for alignment | |
max_project_len = max([len(p) for p in projects]) if projects else 0 | |
print("\nCost by project:") | |
print("-"*50) | |
# Print sorted projects with dollar amounts aligned | |
for project, cost in project_totals.items(): | |
cost_str = f"${cost:.2f}" | |
print(f"{project.ljust(max_project_len)} : {cost_str.rjust(10)}") | |
elif project_names: | |
print("\nNo cost data found for projects.") | |
else: | |
print("\nNo projects found.") | |
print("="*50) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment