dnnspaul · November 18, 2025 11:31
diff --git a/claude_analyze.py b/claude_analyze.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.9"
 # dependencies = [
 #   "pandas>=2.0.0",
 #   "plotly>=6.0.0",
 #   "nbformat>=5.0.0",
 # ]
 # ///
 """
 Claude Code Cost Visualizer

 Run:
 curl -sL https://gist.github.com/dnnspaul/bfe54f51ed91a5eac6618abe30476b2e/raw | uv run --script -

 Credits to https://gist.github.com/esc5221/df0a0c3c068b8dd92282837389addb35 for making the base script. 
 Initially it used `costUSD`, but the variable isn't available in my .jsonl files, so I adjusted the 
 script for using the tokens and calculate the costs based on that.

 This script analyzes Claude Code usage costs from .jsonl files in the Claude projects directory.
 It calculates costs based on token usage from the 'usage' field in each message, using
 model-specific pricing for different Claude variants with full prompt caching support:

 Supports ALL Anthropic Claude models with accurate pricing:

 Latest Models (Claude 4):
 - Claude 4 Opus: $15/million input, $75/million output tokens
 - Claude 4 Sonnet: $3/million input, $15/million output tokens
 - Claude 4.5 Sonnet: $3/million input, $15/million output tokens

 Current Models (Claude 3.5):
 - Claude 3.5 Sonnet: $3/million input, $15/million output tokens
 - Claude 3.5 Haiku: $0.8/million input, $4/million output tokens
 - Claude 4.5 Haiku: $1.0/million input, $5/million output tokens

 Legacy Models (Claude 3, 2, Instant):
 - Claude 3 Opus: $15/million input, $75/million output tokens
 - Claude 3 Sonnet: $3/million input, $15/million output tokens
 - Claude 3 Haiku: $0.25/million input, $1.25/million output tokens
 - Claude 2.x: $8/million input, $24/million output tokens
 - Claude Instant: $0.8-1.63/million input, $2.4-5.51/million output tokens

 Prompt Caching (significant savings on supported models):
 - Cache reads: ~90% discount (e.g., $0.30/million for Sonnet vs $3/million)
 - Cache writes: slight premium for 5m cache, higher for 1h cache
 - Automatically detects and applies correct pricing for cached vs non-cached tokens
 - Legacy models (Claude 2, Instant) have no caching discounts

 It creates an interactive visualization with two charts:
 - Daily costs by project (stacked area chart)
 - Cumulative costs over time (line/bar chart)

 The visualization is saved as an HTML file with interactive features and also displayed if possible.
 Terminal output shows a simple, aligned cost summary by project.

 Usage:
  ./claude_code_cost_visualizer.py
  (Dependencies will be automatically installed by uv)
 """

 import os
 import json
 import glob

 import pandas as pd
 import plotly.graph_objects as go
 from collections import defaultdict
 from plotly.subplots import make_subplots  # Moved import here for consistency

 # Suppress pandas future warning about downcasting
 pd.set_option('future.no_silent_downcasting', True)

 # Claude model pricing configuration (as of January 2025)
 # Source: Anthropic pricing page - costs are per million tokens
 # Includes prompt caching pricing for accurate cost calculation
 MODEL_PRICING = {
    # Claude 4 models (latest generation)
    "claude-4-opus": {
        "input": 15.0, "output": 75.0,
        "cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
    },
    "claude-4-sonnet": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-4-5-sonnet": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-sonnet-4-5-20250929": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-sonnet-4-20250514": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-haiku-4-5-20251001": {
        "input": 1.0, "output": 5.0,
        "cache_write_5m": 1.25, "cache_write_1h": 2.0, "cache_read": 0.30
    },
    
    # Claude 3.5 models with caching support
    "claude-3.5-sonnet": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-3-5-sonnet-20241022": {  # Latest 3.5 Sonnet with date
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-3-5-sonnet-20240620": {  # Previous 3.5 Sonnet version
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-3.5-haiku": {
        "input": 0.8, "output": 4.0,
        "cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08
    },
    "claude-3-5-haiku-20241022": {  # Latest 3.5 Haiku with date
        "input": 0.8, "output": 4.0,
        "cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08
    },
    
    # Claude 3 models (legacy) with caching support
    "claude-3-opus": {
        "input": 15.0, "output": 75.0,
        "cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
    },
    "claude-3-opus-20240229": {  # Specific Opus version
        "input": 15.0, "output": 75.0,
        "cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
    },
    "claude-3-sonnet": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-3-sonnet-20240229": {  # Specific Sonnet version
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    },
    "claude-3-haiku": {
        "input": 0.25, "output": 1.25,
        "cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03
    },
    "claude-3-haiku-20240307": {  # Specific Haiku version
        "input": 0.25, "output": 1.25,
        "cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03
    },
    
    # Claude 2 models (legacy, limited caching)
    "claude-2.1": {
        "input": 8.0, "output": 24.0,
        "cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0  # No caching discount
    },
    "claude-2.0": {
        "input": 8.0, "output": 24.0,
        "cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0  # No caching discount
    },
    "claude-2": {
        "input": 8.0, "output": 24.0,
        "cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0  # No caching discount
    },
    
    # Claude Instant (legacy, deprecated)
    "claude-instant-1.2": {
        "input": 0.8, "output": 2.4,
        "cache_write_5m": 0.8, "cache_write_1h": 0.8, "cache_read": 0.8  # No caching discount
    },
    "claude-instant-1": {
        "input": 1.63, "output": 5.51,
        "cache_write_5m": 1.63, "cache_write_1h": 1.63, "cache_read": 1.63  # No caching discount
    },
    
    # Default fallback for unknown models (using 3.5 Sonnet pricing)
    "default": {
        "input": 3.0, "output": 15.0,
        "cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
    }
 }

 # To add support for new models, simply add them to the MODEL_PRICING dictionary above
 # with their respective input and output token costs per million tokens.
 # Example: "claude-4-opus": {"input": 20.0, "output": 100.0}

 # Find all JSONL files in the Claude projects directory
 # Use os.path.expanduser to correctly resolve the '~' to the user's home directory
 project_dir = os.path.expanduser("~/.claude/projects/")
 jsonl_files = glob.glob(f"{project_dir}/**/*.jsonl", recursive=True)

 # Dictionary to store data: {date: {project: cost}}
 data = defaultdict(lambda: defaultdict(float))
 project_names = set()
 total_cost = 0
 model_usage = defaultdict(int)  # Track model usage counts
 model_costs = defaultdict(float)  # Track costs per model
 monthly_costs = defaultdict(float)  # Track costs per month

 # Process each JSONL file
 for file_path in jsonl_files:
    try:
        # Extract project name from path
        parts = file_path.split(os.sep) # Use os.sep for platform-independent path splitting
        try:
            # Find the '.claude' part and get the project name after 'projects'
            # Adjusting index to handle both hardcoded path structure and '~' expanded paths
            claude_idx = -1
            for i, part in enumerate(parts):
                if part == '.claude':
                    claude_idx = i
                    break
            
            if claude_idx != -1 and claude_idx + 2 < len(parts): # Ensure 'projects' and project name exist
                # If the path looks like /home/user/.claude/projects/project_name/...
                project_name = parts[claude_idx + 2]
                # Further cleanup for names like '-Users-lullu-'
                project_name = project_name.replace('-Users-lullu-', '').replace('-', '/')
            else:
                project_name = "unknown" # Fallback if structure not as expected
        except ValueError:
            # Fallback if '.claude' not in path
            project_name = os.path.basename(os.path.dirname(file_path))
        
        project_names.add(project_name)
        
        # Process each line in the JSONL file
        with open(file_path, 'r') as f:
            for line in f:
                try:
                    entry = json.loads(line)
                    
                    # Calculate cost from token usage (if available)
                    cost = 0
                    if 'message' in entry and 'usage' in entry['message']:
                        usage = entry['message']['usage']
                        model = entry['message'].get('model', 'unknown')
                        
                        # Get pricing for this model, fallback to default if unknown
                        pricing = MODEL_PRICING.get(model, MODEL_PRICING['default'])
                        
                        # If this is an unknown model, add it to our usage tracking for visibility
                        if model not in MODEL_PRICING and model != 'unknown':
                            print(f"Warning: Unknown model '{model}' found. Using default pricing.")
                        
                        # Get token counts
                        input_tokens = usage.get('input_tokens', 0)
                        cache_creation_tokens = usage.get('cache_creation_input_tokens', 0)
                        cache_read_tokens = usage.get('cache_read_input_tokens', 0)
                        output_tokens = usage.get('output_tokens', 0)
                        
                        # Get cache creation info to determine cache type
                        cache_creation_info = usage.get('cache_creation', {})
                        ephemeral_5m_tokens = cache_creation_info.get('ephemeral_5m_input_tokens', 0)
                        ephemeral_1h_tokens = cache_creation_info.get('ephemeral_1h_input_tokens', 0)
                        
                        # Calculate costs for different token types
                        # Regular input tokens at standard rate
                        regular_input_cost = (input_tokens / 1_000_000) * pricing['input']
                        
                        # Cache creation tokens (assume 5m cache if not specified in cache_creation)
                        if ephemeral_5m_tokens > 0:
                            cache_write_cost = (ephemeral_5m_tokens / 1_000_000) * pricing['cache_write_5m']
                        elif ephemeral_1h_tokens > 0:
                            cache_write_cost = (ephemeral_1h_tokens / 1_000_000) * pricing['cache_write_1h']
                        elif cache_creation_tokens > 0:
                            # Fallback: assume 5m cache for unspecified cache creation
                            cache_write_cost = (cache_creation_tokens / 1_000_000) * pricing['cache_write_5m']
                        else:
                            cache_write_cost = 0
                        
                        # Cache read tokens at heavily discounted rate
                        cache_read_cost = (cache_read_tokens / 1_000_000) * pricing['cache_read']
                        
                        # Output tokens at standard rate
                        output_cost = (output_tokens / 1_000_000) * pricing['output']
                        
                        # Total cost
                        cost = regular_input_cost + cache_write_cost + cache_read_cost + output_cost
                        
                        # Track model usage statistics
                        model_usage[model] += 1
                        model_costs[model] += cost
                    
                    if cost > 0:
                        # Extract date from timestamp
                        timestamp = entry.get('timestamp')
                        if timestamp:
                            date = timestamp.split('T')[0]  # Get YYYY-MM-DD part
                            month = date[:7]  # Get YYYY-MM part for monthly tracking
                            
                            # Add cost to the appropriate date and project
                            data[date][project_name] += cost
                            monthly_costs[month] += cost
                            total_cost += cost
                except (json.JSONDecodeError, KeyError):
                    continue  # Skip invalid lines
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        continue

 # Convert to DataFrame for easier plotting
 dates = sorted(data.keys())
 projects = sorted(list(project_names)) # Convert set to list for sorting

 if dates:
    # Create a complete date range from first to last date to fill gaps
    start_date = pd.to_datetime(dates[0])
    end_date = pd.to_datetime(dates[-1])
    complete_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    complete_dates = [date.strftime('%Y-%m-%d') for date in complete_date_range]
    
    # Create DataFrame with complete date range
    df = pd.DataFrame(index=complete_dates, columns=projects)
    
    # Fill in the actual data
    for date in complete_dates:
        for project in projects:
            df.loc[date, project] = data[date].get(project, 0)
    
    # Fill NaN values with 0
    df = df.fillna(0)
 else:
    # Fallback for empty data
    df = pd.DataFrame()

 # Only create visualization if we have data
 if not df.empty and len(projects) > 0:
    # Create a single figure with two subplots sharing the x-axis
    # Create figure with 2 rows (subplots stacked vertically)
    fig = make_subplots(
        rows=2, 
        cols=1,
        shared_xaxes=True,  # Share x-axes between subplots
        vertical_spacing=0.1,  # Add some space between the subplots
        subplot_titles=('Daily Claude Code Usage Cost by Project', 'Cumulative Claude Code Usage Cost')
    )

    # 1. Daily stacked area chart (top subplot)
    for project in projects:
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=df[project],
                mode='lines',
                name=project,
                stackgroup='one',
                hoverinfo='x+y+name'
            ),
            row=1, col=1  # First subplot (top)
        )

    # Add total cost line to daily chart
    df['Total'] = df.sum(axis=1)
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df['Total'],
            mode='lines',
            name='Total Cost',
            line=dict(width=2, color='black', dash='dash'),
        ),
        row=1, col=1  # First subplot (top)
    )

    # 2. Cumulative cost chart (bottom subplot)
    df['Cumulative'] = df['Total'].cumsum()
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df['Cumulative'],
            mode='lines',
            name='Cumulative Cost',
            line=dict(width=3, color='red'),
            fill='tozeroy',
        ),
        row=2, col=1  # Second subplot (bottom)
    )

    # Optionally add daily cost bars to the cumulative chart
    fig.add_trace(
        go.Bar(
            x=df.index,
            y=df['Total'],
            name='Daily Cost',
            marker_color='rgba(55, 83, 109, 0.7)',
            opacity=0.7,
            showlegend=False,  # Hide from legend since it's already in the top subplot
        ),
        row=2, col=1  # Second subplot (bottom)
    )

    # Update layout for entire figure
    fig.update_layout(
        title=f'Claude Code Usage Cost Analysis (Total: ${total_cost:.2f})',
        hovermode='x unified',  # Unified hover mode across all subplots
        legend_title='Projects/Costs',
        width=1600,
        height=900,
        margin=dict(l=50, r=50, t=100, b=100),
    )

    # Update y-axis labels for each subplot
    fig.update_yaxes(title_text="Daily Cost (USD)", row=1, col=1)
    fig.update_yaxes(title_text="Cumulative Cost (USD)", row=2, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)  # Only add x-axis title to bottom subplot

    # Save HTML file
    html_path = os.path.expanduser("~/claude_code_cost_analysis.html")
    fig.write_html(html_path)
    print(f"Saved Claude Code cost analysis chart to {html_path}")
 else:
    print("No data found to create visualization.")

 # Print summary with simple formatting
 print("\n" + "="*50)
 print(f"Total Claude Code usage cost: ${total_cost:.2f}")
 print(f"Script supports {len(MODEL_PRICING)-1} Claude models + fallback pricing")

 # Print model usage statistics
 if model_usage:
    print("\nModel usage statistics:")
    print("-"*50)
    max_model_len = max([len(model) for model in model_usage.keys()]) if model_usage else 0
    
    for model, count in sorted(model_usage.items(), key=lambda x: model_costs[x[0]], reverse=True):
        cost = model_costs[model]
        cost_str = f"${cost:.2f}"
        print(f"{model.ljust(max_model_len)} : {count:4d} calls, {cost_str.rjust(10)}")

 # Print monthly cost breakdown
 if monthly_costs:
    print("\nMonthly cost breakdown:")
    print("-"*50)
    
    # Sort months chronologically
    sorted_months = sorted(monthly_costs.items())
    
    for month, cost in sorted_months:
        cost_str = f"${cost:.2f}"
        # Convert YYYY-MM to more readable format
        year, month_num = month.split('-')
        month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                      'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        month_name = month_names[int(month_num) - 1]
        readable_month = f"{month_name} {year}"
        print(f"{readable_month.ljust(12)} : {cost_str.rjust(10)}")
    
    # Calculate average monthly cost
    if len(monthly_costs) > 1:
        avg_monthly = sum(monthly_costs.values()) / len(monthly_costs)
        print(f"{''.ljust(12)} : {''.rjust(10)}")
        avg_str = f"${avg_monthly:.2f}"
        print(f"{'Avg/month'.ljust(12)} : {avg_str.rjust(10)}")

 # Get project totals and sort by cost (only if we have data)
 if not df.empty and len(projects) > 0:
    project_totals = df[projects].sum().sort_values(ascending=False)
    
    # Calculate the max length of project names for alignment
    max_project_len = max([len(p) for p in projects]) if projects else 0
    
    print("\nCost by project:")
    print("-"*50)
    
    # Print sorted projects with dollar amounts aligned
    for project, cost in project_totals.items():
        cost_str = f"${cost:.2f}"
        print(f"{project.ljust(max_project_len)} : {cost_str.rjust(10)}")
 elif project_names:
    print("\nNo cost data found for projects.")
 else:
    print("\nNo projects found.")

 print("="*50)
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.9"
	# dependencies = [
	# "pandas>=2.0.0",
	# "plotly>=6.0.0",
	# "nbformat>=5.0.0",
	# ]
	# ///
	"""
	Claude Code Cost Visualizer

	Run:
	curl -sL https://gist.github.com/dnnspaul/bfe54f51ed91a5eac6618abe30476b2e/raw \| uv run --script -

	Credits to https://gist.github.com/esc5221/df0a0c3c068b8dd92282837389addb35 for making the base script.
	Initially it used `costUSD`, but the variable isn't available in my .jsonl files, so I adjusted the
	script for using the tokens and calculate the costs based on that.

	This script analyzes Claude Code usage costs from .jsonl files in the Claude projects directory.
	It calculates costs based on token usage from the 'usage' field in each message, using
	model-specific pricing for different Claude variants with full prompt caching support:

	Supports ALL Anthropic Claude models with accurate pricing:

	Latest Models (Claude 4):
	- Claude 4 Opus: $15/million input, $75/million output tokens
	- Claude 4 Sonnet: $3/million input, $15/million output tokens
	- Claude 4.5 Sonnet: $3/million input, $15/million output tokens

	Current Models (Claude 3.5):
	- Claude 3.5 Sonnet: $3/million input, $15/million output tokens
	- Claude 3.5 Haiku: $0.8/million input, $4/million output tokens
	- Claude 4.5 Haiku: $1.0/million input, $5/million output tokens

	Legacy Models (Claude 3, 2, Instant):
	- Claude 3 Opus: $15/million input, $75/million output tokens
	- Claude 3 Sonnet: $3/million input, $15/million output tokens
	- Claude 3 Haiku: $0.25/million input, $1.25/million output tokens
	- Claude 2.x: $8/million input, $24/million output tokens
	- Claude Instant: $0.8-1.63/million input, $2.4-5.51/million output tokens

	Prompt Caching (significant savings on supported models):
	- Cache reads: ~90% discount (e.g., $0.30/million for Sonnet vs $3/million)
	- Cache writes: slight premium for 5m cache, higher for 1h cache
	- Automatically detects and applies correct pricing for cached vs non-cached tokens
	- Legacy models (Claude 2, Instant) have no caching discounts

	It creates an interactive visualization with two charts:
	- Daily costs by project (stacked area chart)
	- Cumulative costs over time (line/bar chart)

	The visualization is saved as an HTML file with interactive features and also displayed if possible.
	Terminal output shows a simple, aligned cost summary by project.

	Usage:
	./claude_code_cost_visualizer.py
	(Dependencies will be automatically installed by uv)
	"""

	import os
	import json
	import glob

	import pandas as pd
	import plotly.graph_objects as go
	from collections import defaultdict
	from plotly.subplots import make_subplots # Moved import here for consistency

	# Suppress pandas future warning about downcasting
	pd.set_option('future.no_silent_downcasting', True)

	# Claude model pricing configuration (as of January 2025)
	# Source: Anthropic pricing page - costs are per million tokens
	# Includes prompt caching pricing for accurate cost calculation
	MODEL_PRICING = {
	# Claude 4 models (latest generation)
	"claude-4-opus": {
	"input": 15.0, "output": 75.0,
	"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
	},
	"claude-4-sonnet": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-4-5-sonnet": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-sonnet-4-5-20250929": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-sonnet-4-20250514": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-haiku-4-5-20251001": {
	"input": 1.0, "output": 5.0,
	"cache_write_5m": 1.25, "cache_write_1h": 2.0, "cache_read": 0.30
	},

	# Claude 3.5 models with caching support
	"claude-3.5-sonnet": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-3-5-sonnet-20241022": { # Latest 3.5 Sonnet with date
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-3-5-sonnet-20240620": { # Previous 3.5 Sonnet version
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-3.5-haiku": {
	"input": 0.8, "output": 4.0,
	"cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08
	},
	"claude-3-5-haiku-20241022": { # Latest 3.5 Haiku with date
	"input": 0.8, "output": 4.0,
	"cache_write_5m": 1.0, "cache_write_1h": 1.6, "cache_read": 0.08
	},

	# Claude 3 models (legacy) with caching support
	"claude-3-opus": {
	"input": 15.0, "output": 75.0,
	"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
	},
	"claude-3-opus-20240229": { # Specific Opus version
	"input": 15.0, "output": 75.0,
	"cache_write_5m": 18.75, "cache_write_1h": 30.0, "cache_read": 1.50
	},
	"claude-3-sonnet": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-3-sonnet-20240229": { # Specific Sonnet version
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	},
	"claude-3-haiku": {
	"input": 0.25, "output": 1.25,
	"cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03
	},
	"claude-3-haiku-20240307": { # Specific Haiku version
	"input": 0.25, "output": 1.25,
	"cache_write_5m": 0.30, "cache_write_1h": 0.50, "cache_read": 0.03
	},

	# Claude 2 models (legacy, limited caching)
	"claude-2.1": {
	"input": 8.0, "output": 24.0,
	"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount
	},
	"claude-2.0": {
	"input": 8.0, "output": 24.0,
	"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount
	},
	"claude-2": {
	"input": 8.0, "output": 24.0,
	"cache_write_5m": 8.0, "cache_write_1h": 8.0, "cache_read": 8.0 # No caching discount
	},

	# Claude Instant (legacy, deprecated)
	"claude-instant-1.2": {
	"input": 0.8, "output": 2.4,
	"cache_write_5m": 0.8, "cache_write_1h": 0.8, "cache_read": 0.8 # No caching discount
	},
	"claude-instant-1": {
	"input": 1.63, "output": 5.51,
	"cache_write_5m": 1.63, "cache_write_1h": 1.63, "cache_read": 1.63 # No caching discount
	},

	# Default fallback for unknown models (using 3.5 Sonnet pricing)
	"default": {
	"input": 3.0, "output": 15.0,
	"cache_write_5m": 3.75, "cache_write_1h": 6.0, "cache_read": 0.30
	}
	}

	# To add support for new models, simply add them to the MODEL_PRICING dictionary above
	# with their respective input and output token costs per million tokens.
	# Example: "claude-4-opus": {"input": 20.0, "output": 100.0}

	# Find all JSONL files in the Claude projects directory
	# Use os.path.expanduser to correctly resolve the '~' to the user's home directory
	project_dir = os.path.expanduser("~/.claude/projects/")
	jsonl_files = glob.glob(f"{project_dir}/*/.jsonl", recursive=True)

	# Dictionary to store data: {date: {project: cost}}
	data = defaultdict(lambda: defaultdict(float))
	project_names = set()
	total_cost = 0
	model_usage = defaultdict(int) # Track model usage counts
	model_costs = defaultdict(float) # Track costs per model
	monthly_costs = defaultdict(float) # Track costs per month

	# Process each JSONL file
	for file_path in jsonl_files:
	try:
	# Extract project name from path
	parts = file_path.split(os.sep) # Use os.sep for platform-independent path splitting
	try:
	# Find the '.claude' part and get the project name after 'projects'
	# Adjusting index to handle both hardcoded path structure and '~' expanded paths
	claude_idx = -1
	for i, part in enumerate(parts):
	if part == '.claude':
	claude_idx = i
	break

	if claude_idx != -1 and claude_idx + 2 < len(parts): # Ensure 'projects' and project name exist
	# If the path looks like /home/user/.claude/projects/project_name/...
	project_name = parts[claude_idx + 2]
	# Further cleanup for names like '-Users-lullu-'
	project_name = project_name.replace('-Users-lullu-', '').replace('-', '/')
	else:
	project_name = "unknown" # Fallback if structure not as expected
	except ValueError:
	# Fallback if '.claude' not in path
	project_name = os.path.basename(os.path.dirname(file_path))

	project_names.add(project_name)

	# Process each line in the JSONL file
	with open(file_path, 'r') as f:
	for line in f:
	try:
	entry = json.loads(line)

	# Calculate cost from token usage (if available)
	cost = 0
	if 'message' in entry and 'usage' in entry['message']:
	usage = entry['message']['usage']
	model = entry['message'].get('model', 'unknown')

	# Get pricing for this model, fallback to default if unknown
	pricing = MODEL_PRICING.get(model, MODEL_PRICING['default'])

	# If this is an unknown model, add it to our usage tracking for visibility
	if model not in MODEL_PRICING and model != 'unknown':
	print(f"Warning: Unknown model '{model}' found. Using default pricing.")

	# Get token counts
	input_tokens = usage.get('input_tokens', 0)
	cache_creation_tokens = usage.get('cache_creation_input_tokens', 0)
	cache_read_tokens = usage.get('cache_read_input_tokens', 0)
	output_tokens = usage.get('output_tokens', 0)

	# Get cache creation info to determine cache type
	cache_creation_info = usage.get('cache_creation', {})
	ephemeral_5m_tokens = cache_creation_info.get('ephemeral_5m_input_tokens', 0)
	ephemeral_1h_tokens = cache_creation_info.get('ephemeral_1h_input_tokens', 0)

	# Calculate costs for different token types
	# Regular input tokens at standard rate
	regular_input_cost = (input_tokens / 1_000_000) * pricing['input']

	# Cache creation tokens (assume 5m cache if not specified in cache_creation)
	if ephemeral_5m_tokens > 0:
	cache_write_cost = (ephemeral_5m_tokens / 1_000_000) * pricing['cache_write_5m']
	elif ephemeral_1h_tokens > 0:
	cache_write_cost = (ephemeral_1h_tokens / 1_000_000) * pricing['cache_write_1h']
	elif cache_creation_tokens > 0:
	# Fallback: assume 5m cache for unspecified cache creation
	cache_write_cost = (cache_creation_tokens / 1_000_000) * pricing['cache_write_5m']
	else:
	cache_write_cost = 0

	# Cache read tokens at heavily discounted rate
	cache_read_cost = (cache_read_tokens / 1_000_000) * pricing['cache_read']

	# Output tokens at standard rate
	output_cost = (output_tokens / 1_000_000) * pricing['output']

	# Total cost
	cost = regular_input_cost + cache_write_cost + cache_read_cost + output_cost

	# Track model usage statistics
	model_usage[model] += 1
	model_costs[model] += cost

	if cost > 0:
	# Extract date from timestamp
	timestamp = entry.get('timestamp')
	if timestamp:
	date = timestamp.split('T')[0] # Get YYYY-MM-DD part
	month = date[:7] # Get YYYY-MM part for monthly tracking

	# Add cost to the appropriate date and project
	data[date][project_name] += cost
	monthly_costs[month] += cost
	total_cost += cost
	except (json.JSONDecodeError, KeyError):
	continue # Skip invalid lines
	except Exception as e:
	print(f"Error processing file {file_path}: {e}")
	continue

	# Convert to DataFrame for easier plotting
	dates = sorted(data.keys())
	projects = sorted(list(project_names)) # Convert set to list for sorting

	if dates:
	# Create a complete date range from first to last date to fill gaps
	start_date = pd.to_datetime(dates[0])
	end_date = pd.to_datetime(dates[-1])
	complete_date_range = pd.date_range(start=start_date, end=end_date, freq='D')
	complete_dates = [date.strftime('%Y-%m-%d') for date in complete_date_range]

	# Create DataFrame with complete date range
	df = pd.DataFrame(index=complete_dates, columns=projects)

	# Fill in the actual data
	for date in complete_dates:
	for project in projects:
	df.loc[date, project] = data[date].get(project, 0)

	# Fill NaN values with 0
	df = df.fillna(0)
	else:
	# Fallback for empty data
	df = pd.DataFrame()

	# Only create visualization if we have data
	if not df.empty and len(projects) > 0:
	# Create a single figure with two subplots sharing the x-axis
	# Create figure with 2 rows (subplots stacked vertically)
	fig = make_subplots(
	rows=2,
	cols=1,
	shared_xaxes=True, # Share x-axes between subplots
	vertical_spacing=0.1, # Add some space between the subplots
	subplot_titles=('Daily Claude Code Usage Cost by Project', 'Cumulative Claude Code Usage Cost')
	)

	# 1. Daily stacked area chart (top subplot)
	for project in projects:
	fig.add_trace(
	go.Scatter(
	x=df.index,
	y=df[project],
	mode='lines',
	name=project,
	stackgroup='one',
	hoverinfo='x+y+name'
	),
	row=1, col=1 # First subplot (top)
	)

	# Add total cost line to daily chart
	df['Total'] = df.sum(axis=1)
	fig.add_trace(
	go.Scatter(
	x=df.index,
	y=df['Total'],
	mode='lines',
	name='Total Cost',
	line=dict(width=2, color='black', dash='dash'),
	),
	row=1, col=1 # First subplot (top)
	)

	# 2. Cumulative cost chart (bottom subplot)
	df['Cumulative'] = df['Total'].cumsum()
	fig.add_trace(
	go.Scatter(
	x=df.index,
	y=df['Cumulative'],
	mode='lines',
	name='Cumulative Cost',
	line=dict(width=3, color='red'),
	fill='tozeroy',
	),
	row=2, col=1 # Second subplot (bottom)
	)

	# Optionally add daily cost bars to the cumulative chart
	fig.add_trace(
	go.Bar(
	x=df.index,
	y=df['Total'],
	name='Daily Cost',
	marker_color='rgba(55, 83, 109, 0.7)',
	opacity=0.7,
	showlegend=False, # Hide from legend since it's already in the top subplot
	),
	row=2, col=1 # Second subplot (bottom)
	)

	# Update layout for entire figure
	fig.update_layout(
	title=f'Claude Code Usage Cost Analysis (Total: ${total_cost:.2f})',
	hovermode='x unified', # Unified hover mode across all subplots
	legend_title='Projects/Costs',
	width=1600,
	height=900,
	margin=dict(l=50, r=50, t=100, b=100),
	)

	# Update y-axis labels for each subplot
	fig.update_yaxes(title_text="Daily Cost (USD)", row=1, col=1)
	fig.update_yaxes(title_text="Cumulative Cost (USD)", row=2, col=1)
	fig.update_xaxes(title_text="Date", row=2, col=1) # Only add x-axis title to bottom subplot

	# Save HTML file
	html_path = os.path.expanduser("~/claude_code_cost_analysis.html")
	fig.write_html(html_path)
	print(f"Saved Claude Code cost analysis chart to {html_path}")
	else:
	print("No data found to create visualization.")

	# Print summary with simple formatting
	print("\n" + "="*50)
	print(f"Total Claude Code usage cost: ${total_cost:.2f}")
	print(f"Script supports {len(MODEL_PRICING)-1} Claude models + fallback pricing")

	# Print model usage statistics
	if model_usage:
	print("\nModel usage statistics:")
	print("-"*50)
	max_model_len = max([len(model) for model in model_usage.keys()]) if model_usage else 0

	for model, count in sorted(model_usage.items(), key=lambda x: model_costs[x[0]], reverse=True):
	cost = model_costs[model]
	cost_str = f"${cost:.2f}"
	print(f"{model.ljust(max_model_len)} : {count:4d} calls, {cost_str.rjust(10)}")

	# Print monthly cost breakdown
	if monthly_costs:
	print("\nMonthly cost breakdown:")
	print("-"*50)

	# Sort months chronologically
	sorted_months = sorted(monthly_costs.items())

	for month, cost in sorted_months:
	cost_str = f"${cost:.2f}"
	# Convert YYYY-MM to more readable format
	year, month_num = month.split('-')
	month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
	month_name = month_names[int(month_num) - 1]
	readable_month = f"{month_name} {year}"
	print(f"{readable_month.ljust(12)} : {cost_str.rjust(10)}")

	# Calculate average monthly cost
	if len(monthly_costs) > 1:
	avg_monthly = sum(monthly_costs.values()) / len(monthly_costs)
	print(f"{''.ljust(12)} : {''.rjust(10)}")
	avg_str = f"${avg_monthly:.2f}"
	print(f"{'Avg/month'.ljust(12)} : {avg_str.rjust(10)}")

	# Get project totals and sort by cost (only if we have data)
	if not df.empty and len(projects) > 0:
	project_totals = df[projects].sum().sort_values(ascending=False)

	# Calculate the max length of project names for alignment
	max_project_len = max([len(p) for p in projects]) if projects else 0

	print("\nCost by project:")
	print("-"*50)

	# Print sorted projects with dollar amounts aligned
	for project, cost in project_totals.items():
	cost_str = f"${cost:.2f}"
	print(f"{project.ljust(max_project_len)} : {cost_str.rjust(10)}")
	elif project_names:
	print("\nNo cost data found for projects.")
	else:
	print("\nNo projects found.")

	print("="*50)
No results found