alexfazio · October 1, 2025 16:26
diff --git a/count-tokens.sh b/count-tokens.sh
 #!/usr/bin/env bash
 #
 # count_tokens.sh - Count tokens in developer files using Claude API
 #
 # Usage: ./count_tokens.sh [file1.py] [file2.js] [folder/] ...
 #
 # Arguments:
 #   - Individual files (any text-based developer file)
 #   - Directories (recursively finds code, config, markup, and docs)
 #   - Mix of files and directories
 #
 # Supported file types:
 #   Code: .py, .js, .ts, .go, .rs, .java, .c, .cpp, .sh, .rb, .php, and 40+ more
 #   Config: .yaml, .json, .toml, .ini, .env, Dockerfile, Makefile, etc.
 #   Markup: .md, .rst, .html, .xml, .txt
 #   Styles: .css, .scss, .sass, .less
 #
 # Environment Variables:
 #   ANTHROPIC_API_KEY - Required API key for authentication (loaded from .env)

 set -euo pipefail

 # Load environment variables from .env file
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ENV_FILE="${SCRIPT_DIR}/.env"

 if [ -f "$ENV_FILE" ]; then
    set -a
    # shellcheck disable=SC1090
    source "$ENV_FILE"
    set +a
 fi

 # Constants
 readonly API_URL="https://api.anthropic.com/v1/messages/count_tokens"
 readonly API_VERSION="2023-06-01"
 readonly MODEL="claude-sonnet-4-5-20250929"
 readonly CONTEXT_200K=200000
 readonly CONTEXT_1M=1000000

 # Colors for output
 readonly RED='\033[0;31m'
 readonly GREEN='\033[0;32m'
 readonly YELLOW='\033[1;33m'
 readonly BLUE='\033[0;34m'
 readonly CYAN='\033[0;36m'
 readonly BOLD='\033[1m'
 readonly NC='\033[0m' # No Color

 # Check for required dependencies
 check_dependencies() {
    local missing_deps=()

    if ! command -v curl &> /dev/null; then
        missing_deps+=("curl")
    fi

    if ! command -v jq &> /dev/null; then
        missing_deps+=("jq")
    fi

    if [ ${#missing_deps[@]} -ne 0 ]; then
        echo -e "${RED}Error: Missing required dependencies: ${missing_deps[*]}${NC}" >&2
        echo "Please install them and try again." >&2
        exit 1
    fi
 }

 # Check for API key
 check_api_key() {
    if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
        echo -e "${RED}Error: ANTHROPIC_API_KEY environment variable is not set${NC}" >&2
        echo "Please add your API key to ${ENV_FILE}" >&2
        exit 1
    fi
 }

 # Find all developer files in a directory recursively
 # Includes: code, config, markup, and documentation files
 # Args: directory path
 # Returns: list of absolute paths to matching files (one per line)
 find_developer_files() {
    local dir="$1"

    if [ ! -d "$dir" ]; then
        echo -e "${RED}Error: Not a directory: $dir${NC}" >&2
        return 1
    fi

    # Find all developer files by extension
    # Organized by category for maintainability
    find "$dir" -type f \( \
        -name "*.py" -o -name "*.pyw" -o -name "*.pyi" -o \
        -name "*.js" -o -name "*.mjs" -o -name "*.cjs" -o \
        -name "*.ts" -o -name "*.tsx" -o -name "*.jsx" -o \
        -name "*.go" -o -name "*.rs" -o -name "*.java" -o \
        -name "*.c" -o -name "*.cpp" -o -name "*.cc" -o -name "*.cxx" -o \
        -name "*.h" -o -name "*.hpp" -o -name "*.hh" -o -name "*.hxx" -o \
        -name "*.cs" -o -name "*.sh" -o -name "*.bash" -o -name "*.zsh" -o \
        -name "*.rb" -o -name "*.php" -o -name "*.swift" -o -name "*.kt" -o \
        -name "*.scala" -o -name "*.clj" -o -name "*.ex" -o -name "*.exs" -o \
        -name "*.erl" -o -name "*.hrl" -o -name "*.hs" -o -name "*.elm" -o \
        -name "*.ml" -o -name "*.fs" -o -name "*.r" -o -name "*.lua" -o \
        -name "*.pl" -o -name "*.pm" -o -name "*.tcl" -o -name "*.vim" -o \
        -name "*.md" -o -name "*.markdown" -o -name "*.rst" -o -name "*.txt" -o \
        -name "*.yaml" -o -name "*.yml" -o -name "*.json" -o -name "*.jsonc" -o \
        -name "*.xml" -o -name "*.toml" -o -name "*.ini" -o -name "*.conf" -o \
        -name "*.cfg" -o -name "*.config" -o -name "*.properties" -o \
        -name "*.html" -o -name "*.htm" -o -name "*.css" -o -name "*.scss" -o \
        -name "*.sass" -o -name "*.less" -o -name "*.vue" -o -name "*.svelte" -o \
        -name "*.sql" -o -name "*.dockerfile" -o -name "Dockerfile*" -o \
        -name "Makefile*" -o -name "*.mk" -o -name ".gitignore" -o \
        -name ".dockerignore" -o -name ".editorconfig" -o -name "*.env*" \
    \) | sort
 }

 # Count tokens for a single file
 # Returns token count to stdout only
 count_tokens() {
    local file="$1"

    # Check if file exists and is readable
    if [ ! -f "$file" ]; then
        echo -e "${RED}Error: File not found: $file${NC}" >&2
        return 1
    fi

    if [ ! -r "$file" ]; then
        echo -e "${RED}Error: File not readable: $file${NC}" >&2
        return 1
    fi

    # Read file content and escape for JSON
    local content
    content=$(jq -Rs . < "$file")

    # Build JSON request
    local request
    request=$(jq -n \
        --arg model "$MODEL" \
        --argjson content "$content" \
        '{
            model: $model,
            messages: [
                {
                    role: "user",
                    content: $content
                }
            ]
        }')

    # Make API request
    local response
    response=$(curl -s -w "\n%{http_code}" \
        -X POST "$API_URL" \
        -H "x-api-key: $ANTHROPIC_API_KEY" \
        -H "anthropic-version: $API_VERSION" \
        -H "content-type: application/json" \
        -d "$request")

    # Extract HTTP status code and body
    local http_code
    http_code=$(echo "$response" | tail -n1)
    local body
    body=$(echo "$response" | sed '$d')

    # Check for errors
    if [ "$http_code" != "200" ]; then
        echo -e "${RED}Error: API request failed with status $http_code${NC}" >&2
        echo "$body" | jq -r '.error.message // .' >&2
        return 1
    fi

    # Extract and return token count
    local token_count
    token_count=$(echo "$body" | jq -r '.input_tokens')
    echo "$token_count"
 }

 # Print table separator
 print_separator() {
    local width="$1"
    printf '%*s\n' "$width" '' | tr ' ' '─'
 }

 # Calculate percentages and warning for tokens
 # Args: tokens
 # Returns: "pct_200k pct_1m warning" (space-separated)
 calculate_stats() {
    local tokens=$1
    local pct_200k pct_1m
    pct_200k=$(awk "BEGIN {printf \"%.2f\", ($tokens / $CONTEXT_200K) * 100}")
    pct_1m=$(awk "BEGIN {printf \"%.2f\", ($tokens / $CONTEXT_1M) * 100}")

    local warning=""
    if awk "BEGIN {exit !($pct_200k > 25)}"; then
        warning="! 200K"
    fi
    if awk "BEGIN {exit !($pct_1m > 25)}"; then
        if [ -n "$warning" ]; then
            warning="! Both"
        else
            warning="! 1M"
        fi
    fi

    echo "$pct_200k $pct_1m $warning"
 }

 # Calculate relative path from base to target
 # Args: base_path target_path
 # Returns: relative path
 get_relative_path() {
    local base="$1"
    local target="$2"
    local normalized_base

    # Normalize paths (remove trailing slashes, resolve to absolute)
    normalized_base=$(cd "$base" 2>/dev/null && pwd) || normalized_base="$base"
    normalized_base="${normalized_base%/}"

    # If target starts with base, strip the base
    if [[ "$target" == "$normalized_base"* ]]; then
        local rel="${target#"$normalized_base"}"
        rel="${rel#/}"  # Remove leading slash
        echo "$rel"
    else
        # Not a subpath, return full target
        echo "$target"
    fi
 }

 # Extract last N meaningful segments from path
 # Args: path, num_segments
 # Returns: abbreviated path with last N segments
 get_last_n_segments() {
    local path="$1"
    local n="${2:-3}"  # Default to 3 segments

    # Handle empty path
    if [ -z "$path" ]; then
        echo "."
        return
    fi

    local IFS='/'
    local -a segments=()
    read -r -a segments <<< "$path"

    local total=${#segments[@]}

    # Handle empty segments array
    if [ "$total" -eq 0 ]; then
        echo "$path"
        return
    fi

    local start=$((total - n))

    if [ "$start" -lt 0 ]; then
        start=0
    fi

    local result=""
    local i
    # Build result by iterating through segments
    while [ "$start" -lt "$total" ]; do
        local seg="${segments[$start]}"
        # Shorten UUIDs in this segment to 8 chars
        seg=$(echo "$seg" | sed -E 's/([0-9a-f]{8})-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/\1/g')

        if [ -z "$result" ]; then
            result="$seg"
        else
            result="$result/$seg"
        fi
        start=$((start + 1))
    done

    echo "$result"
 }

 # Simplify filename to just show type and extension
 # Args: filename
 # Returns: simplified filename
 simplify_filename() {
    local filename="$1"

    # Extract extension
    local ext="${filename##*.}"

    # Check if it's an extraction file
    if [[ "$filename" == *"_extraction."* ]]; then
        echo "extraction.$ext"
    else
        # For other files, just return the basename
        echo "$filename"
    fi
 }

 # Check if input contains any directories
 has_directories() {
    local -n input_files=$1
    for item in "${input_files[@]}"; do
        if [ -d "$item" ]; then
            return 0
        fi
    done
    return 1
 }

 # Display results in tree-style hierarchical format
 # Compatible with bash 3.2+ (no associative arrays)
 display_tree_table() {
    # Use global arrays: files, token_counts, scan_base_path
    local total_tokens=$1
    local file_count=${#files[@]}

    # Print table header
    echo "" >&2
    if [ -n "$scan_base_path" ]; then
        echo -e "${BOLD}${CYAN}Token Count Summary${NC} ${CYAN}(relative to: $scan_base_path)${NC}" >&2
    else
        echo -e "${BOLD}${CYAN}Token Count Summary${NC}" >&2
    fi
    print_separator 80 >&2
    printf "${BOLD}%-23s  %10s  %12s  %12s  %s${NC}\n" \
        "File" "Tokens" "200K %" "1M %" "Warning" >&2
    print_separator 80 >&2

    # Process files grouped by directory
    # Files are pre-sorted by path, so same-dir files are adjacent
    local current_dir=""
    local dir_total=0
    local dir_file_count=0
    local -a dir_file_data=()
    local dir_count=0
    local processed_file_count=0

    for i in $(seq 0 $((file_count - 1))); do
        local file="${files[$i]}"
        local tokens="${token_counts[$i]}"
        local file_dir
        file_dir=$(dirname "$file")

        # Check if we've moved to a new directory
        if [ "$file_dir" != "$current_dir" ] && [ -n "$current_dir" ]; then
            # Print previous directory's data
            print_directory_section "$current_dir" "$dir_total" "$dir_file_count" "${dir_file_data[@]}"
            ((dir_count++))

            # Add spacing between directories
            if [ "$i" -lt $((file_count - 1)) ]; then
                echo "" >&2
            fi

            # Reset for new directory
            dir_total=0
            dir_file_count=0
            dir_file_data=()
        fi

        # Update current directory tracking
        current_dir="$file_dir"

        # Accumulate file data: basename|tokens
        local basename
        basename=$(basename "$file")
        dir_file_data+=("$basename|$tokens")
        dir_total=$((dir_total + tokens))
        ((dir_file_count++))
        ((processed_file_count++))
    done

    # Print last directory
    if [ -n "$current_dir" ]; then
        print_directory_section "$current_dir" "$dir_total" "$dir_file_count" "${dir_file_data[@]}"
        ((dir_count++))
    fi

    # Print grand total if multiple directories or multiple files
    if [ "$dir_count" -gt 1 ] || [ "$file_count" -gt 1 ]; then
        print_separator 80 >&2
        local total_stats
        total_stats=$(calculate_stats "$total_tokens")
        read -r total_pct_200k total_pct_1m total_warning <<<"$total_stats"

        if [ -n "$total_warning" ]; then
            printf "${BOLD}%-23s  ${YELLOW}%10s${NC}  %11s%%  %11s%%  ${YELLOW}%s${NC}\n" \
                "GRAND TOTAL" "$total_tokens" "$total_pct_200k" "$total_pct_1m" "$total_warning" >&2
        else
            printf "${BOLD}%-23s  ${YELLOW}%10s${NC}  %11s%%  %11s%%  %s${NC}\n" \
                "GRAND TOTAL" "$total_tokens" "$total_pct_200k" "$total_pct_1m" "-" >&2
        fi
    fi

    print_separator 80 >&2
    echo "" >&2
 }

 # Print a directory section with its files
 # Args: dir_path dir_total file_count file_data_array...
 print_directory_section() {
    local dir_path=$1
    local dir_total=$2
    local num_files=$3
    shift 3
    local -a file_data=("$@")

    # Calculate directory stats
    local dir_stats
    dir_stats=$(calculate_stats "$dir_total")
    read -r dir_pct_200k dir_pct_1m dir_warning <<<"$dir_stats"

    # Get last 3 segments of directory path for display
    local display_path="$dir_path"
    if [ -n "$scan_base_path" ]; then
        display_path=$(get_relative_path "$scan_base_path" "$dir_path")
    fi
    display_path=$(get_last_n_segments "$display_path" 3)

    # Print directory header
    echo -e "${BOLD}${BLUE}${display_path}/${NC}" >&2

    # Print each file
    local file_num=0
    for file_entry in "${file_data[@]}"; do
        ((file_num++))

        # Parse: basename|tokens
        local basename="${file_entry%|*}"
        local tokens="${file_entry#*|}"

        # Simplify filename for display
        local display_filename
        display_filename=$(simplify_filename "$basename")

        # Calculate file stats
        local file_stats
        file_stats=$(calculate_stats "$tokens")
        read -r pct_200k pct_1m warning <<<"$file_stats"

        # Determine tree character
        local tree_char="├─"
        if [ "$file_num" -eq "$num_files" ]; then
            tree_char="└─"
        fi

        # Print file row with simpler formatting
        if [ -n "$warning" ]; then
            printf "${GREEN}%s %-20s${NC}  ${YELLOW}%10s${NC}  %11s%%  %11s%%  ${YELLOW}%s${NC}\n" \
                "$tree_char" "$display_filename" "$tokens" "$pct_200k" "$pct_1m" "$warning" >&2
        else
            printf "${GREEN}%s %-20s${NC}  ${YELLOW}%10s${NC}  %11s%%  %11s%%  %s\n" \
                "$tree_char" "$display_filename" "$tokens" "$pct_200k" "$pct_1m" "-" >&2
        fi
    done

    # Print directory subtotal
    if [ "$num_files" -gt 1 ]; then
        if [ -n "$dir_warning" ]; then
            printf "${BOLD}   └─ Subtotal%9s  ${YELLOW}%10s${NC}  %11s%%  %11s%%  ${YELLOW}%s${NC}\n" \
                "" "$dir_total" "$dir_pct_200k" "$dir_pct_1m" "$dir_warning" >&2
        else
            printf "${BOLD}   └─ Subtotal%9s  ${YELLOW}%10s${NC}  %11s%%  %11s%%  %s${NC}\n" \
                "" "$dir_total" "$dir_pct_200k" "$dir_pct_1m" "-" >&2
        fi
    fi
 }

 # Check if GNU parallel is available
 has_parallel() {
    command -v parallel &> /dev/null && parallel --version 2>&1 | grep -q "GNU parallel"
 }

 # Process files in parallel using GNU parallel
 # Args: array of file paths
 process_files_parallel() {
    local -a file_list=("$@")
    local temp_dir
    temp_dir=$(mktemp -d)
    local results_file="$temp_dir/results.txt"

    # Export necessary functions and variables for parallel
    export API_URL API_VERSION MODEL ANTHROPIC_API_KEY
    export -f count_tokens

    # Create wrapper function for parallel
    count_tokens_wrapper() {
        local file="$1"
        local tokens
        if tokens=$(count_tokens "$file" 2>/dev/null); then
            echo "$file|$tokens"
        fi
    }
    export -f count_tokens_wrapper

    # Run parallel processing (max 6 concurrent jobs to respect API limits)
    # Suppress progress bar to avoid ANSI escape code pollution
    printf "%s\n" "${file_list[@]}" | \
        parallel --will-cite --jobs 6 count_tokens_wrapper {} 2>/dev/null > "$results_file"

    # Read results
    while IFS='|' read -r file tokens; do
        if [ -n "$file" ] && [ -n "$tokens" ]; then
            files+=("$file")
            token_counts+=("$tokens")
            total_tokens=$((total_tokens + tokens))
            ((successful_files++))
        fi
    done < "$results_file"

    # Sort results by file path while maintaining token_counts correspondence
    # This ensures proper directory grouping in display output
    if [ "${#files[@]}" -gt 0 ]; then
        local -a combined=()
        local idx=0
        while [ "$idx" -lt "${#files[@]}" ]; do
            combined+=("${files[$idx]}|${token_counts[$idx]}")
            idx=$((idx + 1))
        done

        # Sort combined array by file path
        local -a sorted=()
        while IFS= read -r line; do
            sorted+=("$line")
        done < <(printf "%s\n" "${combined[@]}" | sort)

        # Rebuild arrays from sorted data
        files=()
        token_counts=()
        for entry in "${sorted[@]}"; do
            files+=("${entry%|*}")
            token_counts+=("${entry#*|}")
        done
    fi

    # Cleanup
    rm -rf "$temp_dir"
 }

 # Main function
 main() {
    if [ $# -eq 0 ]; then
        echo "Usage: $0 [file1.py] [file2.js] [folder/] ..." >&2
        echo "" >&2
        echo "Count tokens in developer files using Claude API" >&2
        echo "Accepts individual files, directories, or a mix of both" >&2
        echo "Supports: code, config, markup, style files (50+ extensions)" >&2
        exit 1
    fi

    check_dependencies
    check_api_key

    # Variables accessible by display functions
    files=()
    token_counts=()
    scan_base_path=""
    local total_tokens=0
    local successful_files=0

    # Determine base path for relative display
    # Use first directory argument, or PWD if only files
    for arg in "$@"; do
        if [ -d "$arg" ]; then
            scan_base_path=$(cd "$arg" && pwd)
            break
        fi
    done
    if [ -z "$scan_base_path" ]; then
        scan_base_path=$(pwd)
    fi

    # Expand directories to files
    local -a all_files=()
    for arg in "$@"; do
        if [ -d "$arg" ]; then
            echo -e "${CYAN}Scanning directory${NC}: $arg" >&2
            while IFS= read -r file; do
                # Use paths as-is from find (avoids path length issues)
                all_files+=("$file")
            done < <(find_developer_files "$arg")
        elif [ -f "$arg" ]; then
            # Use file path as-is
            all_files+=("$arg")
        else
            echo -e "${RED}Warning: Skipping non-existent path: $arg${NC}" >&2
        fi
    done

    # Check if we found any files
    if [ ${#all_files[@]} -eq 0 ]; then
        echo -e "${RED}Error: No supported files found${NC}" >&2
        exit 1
    fi

    echo -e "${CYAN}Found ${#all_files[@]} file(s) to process${NC}" >&2

    # Choose processing method based on availability of GNU parallel
    if has_parallel && [ ${#all_files[@]} -gt 3 ]; then
        echo -e "${CYAN}Using parallel processing (6 concurrent jobs)${NC}" >&2
        echo "" >&2
        process_files_parallel "${all_files[@]}"
    else
        if [ ${#all_files[@]} -gt 3 ] && ! has_parallel; then
            echo -e "${YELLOW}Note: Install GNU parallel for faster processing${NC}" >&2
        fi
        echo "" >&2
        # Sequential processing
        for file in "${all_files[@]}"; do
            echo -e "${BLUE}Processing${NC}: $file" >&2
            local file_tokens
            if file_tokens=$(count_tokens "$file"); then
                files+=("$file")
                token_counts+=("$file_tokens")
                total_tokens=$((total_tokens + file_tokens))
                ((successful_files++))
            fi
        done
    fi

    # Display results in tree format
    if [ $successful_files -gt 0 ]; then
        display_tree_table "$total_tokens"
    fi
 }

 main "$@"
	#!/usr/bin/env bash
	#
	# count_tokens.sh - Count tokens in developer files using Claude API
	#
	# Usage: ./count_tokens.sh [file1.py] [file2.js] [folder/] ...
	#
	# Arguments:
	# - Individual files (any text-based developer file)
	# - Directories (recursively finds code, config, markup, and docs)
	# - Mix of files and directories
	#
	# Supported file types:
	# Code: .py, .js, .ts, .go, .rs, .java, .c, .cpp, .sh, .rb, .php, and 40+ more
	# Config: .yaml, .json, .toml, .ini, .env, Dockerfile, Makefile, etc.
	# Markup: .md, .rst, .html, .xml, .txt
	# Styles: .css, .scss, .sass, .less
	#
	# Environment Variables:
	# ANTHROPIC_API_KEY - Required API key for authentication (loaded from .env)

	set -euo pipefail

	# Load environment variables from .env file
	SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
	ENV_FILE="${SCRIPT_DIR}/.env"

	if [ -f "$ENV_FILE" ]; then
	set -a
	# shellcheck disable=SC1090
	source "$ENV_FILE"
	set +a
	fi

	# Constants
	readonly API_URL="https://api.anthropic.com/v1/messages/count_tokens"
	readonly API_VERSION="2023-06-01"
	readonly MODEL="claude-sonnet-4-5-20250929"
	readonly CONTEXT_200K=200000
	readonly CONTEXT_1M=1000000

	# Colors for output
	readonly RED='\033[0;31m'
	readonly GREEN='\033[0;32m'
	readonly YELLOW='\033[1;33m'
	readonly BLUE='\033[0;34m'
	readonly CYAN='\033[0;36m'
	readonly BOLD='\033[1m'
	readonly NC='\033[0m' # No Color

	# Check for required dependencies
	check_dependencies() {
	local missing_deps=()

	if ! command -v curl &> /dev/null; then
	missing_deps+=("curl")
	fi

	if ! command -v jq &> /dev/null; then
	missing_deps+=("jq")
	fi

	if [ ${#missing_deps[@]} -ne 0 ]; then
	echo -e "${RED}Error: Missing required dependencies: ${missing_deps[*]}${NC}" >&2
	echo "Please install them and try again." >&2
	exit 1
	fi
	}

	# Check for API key
	check_api_key() {
	if [ -z "${ANTHROPIC_API_KEY:-}" ]; then
	echo -e "${RED}Error: ANTHROPIC_API_KEY environment variable is not set${NC}" >&2
	echo "Please add your API key to ${ENV_FILE}" >&2
	exit 1
	fi
	}

	# Find all developer files in a directory recursively
	# Includes: code, config, markup, and documentation files
	# Args: directory path
	# Returns: list of absolute paths to matching files (one per line)
	find_developer_files() {
	local dir="$1"

	if [ ! -d "$dir" ]; then
	echo -e "${RED}Error: Not a directory: $dir${NC}" >&2
	return 1
	fi

	# Find all developer files by extension
	# Organized by category for maintainability
	find "$dir" -type f \( \
	-name ".py" -o -name ".pyw" -o -name "*.pyi" -o \
	-name ".js" -o -name ".mjs" -o -name "*.cjs" -o \
	-name ".ts" -o -name ".tsx" -o -name "*.jsx" -o \
	-name ".go" -o -name ".rs" -o -name "*.java" -o \
	-name ".c" -o -name ".cpp" -o -name ".cc" -o -name ".cxx" -o \
	-name ".h" -o -name ".hpp" -o -name ".hh" -o -name ".hxx" -o \
	-name ".cs" -o -name ".sh" -o -name ".bash" -o -name ".zsh" -o \
	-name ".rb" -o -name ".php" -o -name ".swift" -o -name ".kt" -o \
	-name ".scala" -o -name ".clj" -o -name ".ex" -o -name ".exs" -o \
	-name ".erl" -o -name ".hrl" -o -name ".hs" -o -name ".elm" -o \
	-name ".ml" -o -name ".fs" -o -name ".r" -o -name ".lua" -o \
	-name ".pl" -o -name ".pm" -o -name ".tcl" -o -name ".vim" -o \
	-name ".md" -o -name ".markdown" -o -name ".rst" -o -name ".txt" -o \
	-name ".yaml" -o -name ".yml" -o -name ".json" -o -name ".jsonc" -o \
	-name ".xml" -o -name ".toml" -o -name ".ini" -o -name ".conf" -o \
	-name ".cfg" -o -name ".config" -o -name "*.properties" -o \
	-name ".html" -o -name ".htm" -o -name ".css" -o -name ".scss" -o \
	-name ".sass" -o -name ".less" -o -name ".vue" -o -name ".svelte" -o \
	-name ".sql" -o -name ".dockerfile" -o -name "Dockerfile*" -o \
	-name "Makefile" -o -name ".mk" -o -name ".gitignore" -o \
	-name ".dockerignore" -o -name ".editorconfig" -o -name ".env" \
	\) \| sort
	}

	# Count tokens for a single file
	# Returns token count to stdout only
	count_tokens() {
	local file="$1"

	# Check if file exists and is readable
	if [ ! -f "$file" ]; then
	echo -e "${RED}Error: File not found: $file${NC}" >&2
	return 1
	fi

	if [ ! -r "$file" ]; then
	echo -e "${RED}Error: File not readable: $file${NC}" >&2
	return 1
	fi

	# Read file content and escape for JSON
	local content
	content=$(jq -Rs . < "$file")

	# Build JSON request
	local request
	request=$(jq -n \
	--arg model "$MODEL" \
	--argjson content "$content" \
	'{
	model: $model,
	messages: [
	{
	role: "user",
	content: $content
	}
	]
	}')

	# Make API request
	local response
	response=$(curl -s -w "\n%{http_code}" \
	-X POST "$API_URL" \
	-H "x-api-key: $ANTHROPIC_API_KEY" \
	-H "anthropic-version: $API_VERSION" \
	-H "content-type: application/json" \
	-d "$request")

	# Extract HTTP status code and body
	local http_code
	http_code=$(echo "$response" \| tail -n1)
	local body
	body=$(echo "$response" \| sed '$d')

	# Check for errors
	if [ "$http_code" != "200" ]; then
	echo -e "${RED}Error: API request failed with status $http_code${NC}" >&2
	echo "$body" \| jq -r '.error.message // .' >&2
	return 1
	fi

	# Extract and return token count
	local token_count
	token_count=$(echo "$body" \| jq -r '.input_tokens')
	echo "$token_count"
	}

	# Print table separator
	print_separator() {
	local width="$1"
	printf '%*s\n' "$width" '' \| tr ' ' '─'
	}

	# Calculate percentages and warning for tokens
	# Args: tokens
	# Returns: "pct_200k pct_1m warning" (space-separated)
	calculate_stats() {
	local tokens=$1
	local pct_200k pct_1m
	pct_200k=$(awk "BEGIN {printf \"%.2f\", ($tokens / $CONTEXT_200K) * 100}")
	pct_1m=$(awk "BEGIN {printf \"%.2f\", ($tokens / $CONTEXT_1M) * 100}")

	local warning=""
	if awk "BEGIN {exit !($pct_200k > 25)}"; then
	warning="! 200K"
	fi
	if awk "BEGIN {exit !($pct_1m > 25)}"; then
	if [ -n "$warning" ]; then
	warning="! Both"
	else
	warning="! 1M"
	fi
	fi

	echo "$pct_200k $pct_1m $warning"
	}

	# Calculate relative path from base to target
	# Args: base_path target_path
	# Returns: relative path
	get_relative_path() {
	local base="$1"
	local target="$2"
	local normalized_base

	# Normalize paths (remove trailing slashes, resolve to absolute)
	normalized_base=$(cd "$base" 2>/dev/null && pwd) \|\| normalized_base="$base"
	normalized_base="${normalized_base%/}"

	# If target starts with base, strip the base
	if [[ "$target" == "$normalized_base"* ]]; then
	local rel="${target#"$normalized_base"}"
	rel="${rel#/}" # Remove leading slash
	echo "$rel"
	else
	# Not a subpath, return full target
	echo "$target"
	fi
	}

	# Extract last N meaningful segments from path
	# Args: path, num_segments
	# Returns: abbreviated path with last N segments
	get_last_n_segments() {
	local path="$1"
	local n="${2:-3}" # Default to 3 segments

	# Handle empty path
	if [ -z "$path" ]; then
	echo "."
	return
	fi

	local IFS='/'
	local -a segments=()
	read -r -a segments <<< "$path"

	local total=${#segments[@]}

	# Handle empty segments array
	if [ "$total" -eq 0 ]; then
	echo "$path"
	return
	fi

	local start=$((total - n))

	if [ "$start" -lt 0 ]; then
	start=0
	fi

	local result=""
	local i
	# Build result by iterating through segments
	while [ "$start" -lt "$total" ]; do
	local seg="${segments[$start]}"
	# Shorten UUIDs in this segment to 8 chars
	seg=$(echo "$seg" \| sed -E 's/([0-9a-f]{8})-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/\1/g')

	if [ -z "$result" ]; then
	result="$seg"
	else
	result="$result/$seg"
	fi
	start=$((start + 1))
	done

	echo "$result"
	}

	# Simplify filename to just show type and extension
	# Args: filename
	# Returns: simplified filename
	simplify_filename() {
	local filename="$1"

	# Extract extension
	local ext="${filename##*.}"

	# Check if it's an extraction file
	if [[ "$filename" == "_extraction." ]]; then
	echo "extraction.$ext"
	else
	# For other files, just return the basename
	echo "$filename"
	fi
	}

	# Check if input contains any directories
	has_directories() {
	local -n input_files=$1
	for item in "${input_files[@]}"; do
	if [ -d "$item" ]; then
	return 0
	fi
	done
	return 1
	}

	# Display results in tree-style hierarchical format
	# Compatible with bash 3.2+ (no associative arrays)
	display_tree_table() {
	# Use global arrays: files, token_counts, scan_base_path
	local total_tokens=$1
	local file_count=${#files[@]}

	# Print table header
	echo "" >&2
	if [ -n "$scan_base_path" ]; then
	echo -e "${BOLD}${CYAN}Token Count Summary${NC} ${CYAN}(relative to: $scan_base_path)${NC}" >&2
	else
	echo -e "${BOLD}${CYAN}Token Count Summary${NC}" >&2
	fi
	print_separator 80 >&2
	printf "${BOLD}%-23s %10s %12s %12s %s${NC}\n" \
	"File" "Tokens" "200K %" "1M %" "Warning" >&2
	print_separator 80 >&2

	# Process files grouped by directory
	# Files are pre-sorted by path, so same-dir files are adjacent
	local current_dir=""
	local dir_total=0
	local dir_file_count=0
	local -a dir_file_data=()
	local dir_count=0
	local processed_file_count=0

	for i in $(seq 0 $((file_count - 1))); do
	local file="${files[$i]}"
	local tokens="${token_counts[$i]}"
	local file_dir
	file_dir=$(dirname "$file")

	# Check if we've moved to a new directory
	if [ "$file_dir" != "$current_dir" ] && [ -n "$current_dir" ]; then
	# Print previous directory's data
	print_directory_section "$current_dir" "$dir_total" "$dir_file_count" "${dir_file_data[@]}"
	((dir_count++))

	# Add spacing between directories
	if [ "$i" -lt $((file_count - 1)) ]; then
	echo "" >&2
	fi

	# Reset for new directory
	dir_total=0
	dir_file_count=0
	dir_file_data=()
	fi

	# Update current directory tracking
	current_dir="$file_dir"

	# Accumulate file data: basename\|tokens
	local basename
	basename=$(basename "$file")
	dir_file_data+=("$basename\|$tokens")
	dir_total=$((dir_total + tokens))
	((dir_file_count++))
	((processed_file_count++))
	done

	# Print last directory
	if [ -n "$current_dir" ]; then
	print_directory_section "$current_dir" "$dir_total" "$dir_file_count" "${dir_file_data[@]}"
	((dir_count++))
	fi

	# Print grand total if multiple directories or multiple files
	if [ "$dir_count" -gt 1 ] \|\| [ "$file_count" -gt 1 ]; then
	print_separator 80 >&2
	local total_stats
	total_stats=$(calculate_stats "$total_tokens")
	read -r total_pct_200k total_pct_1m total_warning <<<"$total_stats"

	if [ -n "$total_warning" ]; then
	printf "${BOLD}%-23s ${YELLOW}%10s${NC} %11s%% %11s%% ${YELLOW}%s${NC}\n" \
	"GRAND TOTAL" "$total_tokens" "$total_pct_200k" "$total_pct_1m" "$total_warning" >&2
	else
	printf "${BOLD}%-23s ${YELLOW}%10s${NC} %11s%% %11s%% %s${NC}\n" \
	"GRAND TOTAL" "$total_tokens" "$total_pct_200k" "$total_pct_1m" "-" >&2
	fi
	fi

	print_separator 80 >&2
	echo "" >&2
	}

	# Print a directory section with its files
	# Args: dir_path dir_total file_count file_data_array...
	print_directory_section() {
	local dir_path=$1
	local dir_total=$2
	local num_files=$3
	shift 3
	local -a file_data=("$@")

	# Calculate directory stats
	local dir_stats
	dir_stats=$(calculate_stats "$dir_total")
	read -r dir_pct_200k dir_pct_1m dir_warning <<<"$dir_stats"

	# Get last 3 segments of directory path for display
	local display_path="$dir_path"
	if [ -n "$scan_base_path" ]; then
	display_path=$(get_relative_path "$scan_base_path" "$dir_path")
	fi
	display_path=$(get_last_n_segments "$display_path" 3)

	# Print directory header
	echo -e "${BOLD}${BLUE}${display_path}/${NC}" >&2

	# Print each file
	local file_num=0
	for file_entry in "${file_data[@]}"; do
	((file_num++))

	# Parse: basename\|tokens
	local basename="${file_entry%\|*}"
	local tokens="${file_entry#*\|}"

	# Simplify filename for display
	local display_filename
	display_filename=$(simplify_filename "$basename")

	# Calculate file stats
	local file_stats
	file_stats=$(calculate_stats "$tokens")
	read -r pct_200k pct_1m warning <<<"$file_stats"

	# Determine tree character
	local tree_char="├─"
	if [ "$file_num" -eq "$num_files" ]; then
	tree_char="└─"
	fi

	# Print file row with simpler formatting
	if [ -n "$warning" ]; then
	printf "${GREEN}%s %-20s${NC} ${YELLOW}%10s${NC} %11s%% %11s%% ${YELLOW}%s${NC}\n" \
	"$tree_char" "$display_filename" "$tokens" "$pct_200k" "$pct_1m" "$warning" >&2
	else
	printf "${GREEN}%s %-20s${NC} ${YELLOW}%10s${NC} %11s%% %11s%% %s\n" \
	"$tree_char" "$display_filename" "$tokens" "$pct_200k" "$pct_1m" "-" >&2
	fi
	done

	# Print directory subtotal
	if [ "$num_files" -gt 1 ]; then
	if [ -n "$dir_warning" ]; then
	printf "${BOLD} └─ Subtotal%9s ${YELLOW}%10s${NC} %11s%% %11s%% ${YELLOW}%s${NC}\n" \
	"" "$dir_total" "$dir_pct_200k" "$dir_pct_1m" "$dir_warning" >&2
	else
	printf "${BOLD} └─ Subtotal%9s ${YELLOW}%10s${NC} %11s%% %11s%% %s${NC}\n" \
	"" "$dir_total" "$dir_pct_200k" "$dir_pct_1m" "-" >&2
	fi
	fi
	}

	# Check if GNU parallel is available
	has_parallel() {
	command -v parallel &> /dev/null && parallel --version 2>&1 \| grep -q "GNU parallel"
	}

	# Process files in parallel using GNU parallel
	# Args: array of file paths
	process_files_parallel() {
	local -a file_list=("$@")
	local temp_dir
	temp_dir=$(mktemp -d)
	local results_file="$temp_dir/results.txt"

	# Export necessary functions and variables for parallel
	export API_URL API_VERSION MODEL ANTHROPIC_API_KEY
	export -f count_tokens

	# Create wrapper function for parallel
	count_tokens_wrapper() {
	local file="$1"
	local tokens
	if tokens=$(count_tokens "$file" 2>/dev/null); then
	echo "$file\|$tokens"
	fi
	}
	export -f count_tokens_wrapper

	# Run parallel processing (max 6 concurrent jobs to respect API limits)
	# Suppress progress bar to avoid ANSI escape code pollution
	printf "%s\n" "${file_list[@]}" \| \
	parallel --will-cite --jobs 6 count_tokens_wrapper {} 2>/dev/null > "$results_file"

	# Read results
	while IFS='\|' read -r file tokens; do
	if [ -n "$file" ] && [ -n "$tokens" ]; then
	files+=("$file")
	token_counts+=("$tokens")
	total_tokens=$((total_tokens + tokens))
	((successful_files++))
	fi
	done < "$results_file"

	# Sort results by file path while maintaining token_counts correspondence
	# This ensures proper directory grouping in display output
	if [ "${#files[@]}" -gt 0 ]; then
	local -a combined=()
	local idx=0
	while [ "$idx" -lt "${#files[@]}" ]; do
	combined+=("${files[$idx]}\|${token_counts[$idx]}")
	idx=$((idx + 1))
	done

	# Sort combined array by file path
	local -a sorted=()
	while IFS= read -r line; do
	sorted+=("$line")
	done < <(printf "%s\n" "${combined[@]}" \| sort)

	# Rebuild arrays from sorted data
	files=()
	token_counts=()
	for entry in "${sorted[@]}"; do
	files+=("${entry%\|*}")
	token_counts+=("${entry#*\|}")
	done
	fi

	# Cleanup
	rm -rf "$temp_dir"
	}

	# Main function
	main() {
	if [ $# -eq 0 ]; then
	echo "Usage: $0 [file1.py] [file2.js] [folder/] ..." >&2
	echo "" >&2
	echo "Count tokens in developer files using Claude API" >&2
	echo "Accepts individual files, directories, or a mix of both" >&2
	echo "Supports: code, config, markup, style files (50+ extensions)" >&2
	exit 1
	fi

	check_dependencies
	check_api_key

	# Variables accessible by display functions
	files=()
	token_counts=()
	scan_base_path=""
	local total_tokens=0
	local successful_files=0

	# Determine base path for relative display
	# Use first directory argument, or PWD if only files
	for arg in "$@"; do
	if [ -d "$arg" ]; then
	scan_base_path=$(cd "$arg" && pwd)
	break
	fi
	done
	if [ -z "$scan_base_path" ]; then
	scan_base_path=$(pwd)
	fi

	# Expand directories to files
	local -a all_files=()
	for arg in "$@"; do
	if [ -d "$arg" ]; then
	echo -e "${CYAN}Scanning directory${NC}: $arg" >&2
	while IFS= read -r file; do
	# Use paths as-is from find (avoids path length issues)
	all_files+=("$file")
	done < <(find_developer_files "$arg")
	elif [ -f "$arg" ]; then
	# Use file path as-is
	all_files+=("$arg")
	else
	echo -e "${RED}Warning: Skipping non-existent path: $arg${NC}" >&2
	fi
	done

	# Check if we found any files
	if [ ${#all_files[@]} -eq 0 ]; then
	echo -e "${RED}Error: No supported files found${NC}" >&2
	exit 1
	fi

	echo -e "${CYAN}Found ${#all_files[@]} file(s) to process${NC}" >&2

	# Choose processing method based on availability of GNU parallel
	if has_parallel && [ ${#all_files[@]} -gt 3 ]; then
	echo -e "${CYAN}Using parallel processing (6 concurrent jobs)${NC}" >&2
	echo "" >&2
	process_files_parallel "${all_files[@]}"
	else
	if [ ${#all_files[@]} -gt 3 ] && ! has_parallel; then
	echo -e "${YELLOW}Note: Install GNU parallel for faster processing${NC}" >&2
	fi
	echo "" >&2
	# Sequential processing
	for file in "${all_files[@]}"; do
	echo -e "${BLUE}Processing${NC}: $file" >&2
	local file_tokens
	if file_tokens=$(count_tokens "$file"); then
	files+=("$file")
	token_counts+=("$file_tokens")
	total_tokens=$((total_tokens + file_tokens))
	((successful_files++))
	fi
	done
	fi

	# Display results in tree format
	if [ $successful_files -gt 0 ]; then
	display_tree_table "$total_tokens"
	fi
	}

	main "$@"
No results found