Created
October 7, 2025 17:34
-
-
Save nilsreichardt/d46ab5161dcb768e95b82af3e96fdd39 to your computer and use it in GitHub Desktop.
This Python script scans all CSV files in a folder, sums up `input_tokens` and `output_tokens` from each file, reports per-file totals, and prints the grand total across all files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Sum OpenAI usage tokens from CSV exports. | |
| For each *.csv file in the provided folder, this script computes: | |
| - SUM(input_tokens) | |
| - SUM(output_tokens) | |
| - SUM(input_tokens + output_tokens) | |
| Then it prints a per-file summary and a final total across all files. | |
| Usage: | |
| python sum_openai_usage_tokens.py /path/to/folder | |
| # Optional flags: | |
| # --pattern "*.csv" (glob pattern for files, default: "*.csv") | |
| # --encoding "utf-8" (file encoding, default: "utf-8") | |
| # --quiet (only print totals, suppress per-row warnings) | |
| """ | |
| import argparse | |
| import csv | |
| import glob | |
| import os | |
| import sys | |
| from typing import Tuple | |
| def parse_int(value: str) -> int: | |
| """Parse a string as int, tolerating empty values and commas. Returns 0 on failure.""" | |
| if value is None: | |
| return 0 | |
| s = str(value).strip() | |
| if not s: | |
| return 0 | |
| # Remove thousands separators and surrounding quotes if present | |
| s = s.replace(",", "").strip('"').strip("'") | |
| try: | |
| return int(float(s)) # handle cases like "12.0" | |
| except Exception: | |
| return 0 | |
| def sum_file(path: str, quiet: bool = False, encoding: str = "utf-8") -> Tuple[int, int, int]: | |
| """Return (sum_input, sum_output, sum_total) for a CSV file.""" | |
| sum_in = 0 | |
| sum_out = 0 | |
| with open(path, "r", encoding=encoding, newline="") as f: | |
| reader = csv.DictReader(f) | |
| # Validate required columns | |
| header_lower = {h.lower(): h for h in reader.fieldnames or []} | |
| has_in = "input_tokens" in header_lower | |
| has_out = "output_tokens" in header_lower | |
| if not (has_in and has_out): | |
| raise ValueError( | |
| f"{os.path.basename(path)} is missing required columns. " | |
| f"Found columns: {reader.fieldnames}. Needs 'input_tokens' and 'output_tokens'." | |
| ) | |
| col_in = header_lower["input_tokens"] | |
| col_out = header_lower["output_tokens"] | |
| for i, row in enumerate(reader, start=2): # start at 2 (1-based header row + first data row) | |
| try: | |
| sum_in += parse_int(row.get(col_in)) | |
| sum_out += parse_int(row.get(col_out)) | |
| except Exception as e: | |
| if not quiet: | |
| print(f"Warning: {os.path.basename(path)} line {i}: {e}", file=sys.stderr) | |
| return sum_in, sum_out, sum_in + sum_out | |
| def main(): | |
| ap = argparse.ArgumentParser(description="Sum OpenAI token usage from CSV files.") | |
| ap.add_argument("folder", help="Folder containing CSV export files") | |
| ap.add_argument("--pattern", default="*.csv", help='Glob to match files (default: "*.csv")') | |
| ap.add_argument("--encoding", default="utf-8", help="File encoding (default: utf-8)") | |
| ap.add_argument("--quiet", action="store_true", help="Suppress non-critical warnings") | |
| args = ap.parse_args() | |
| folder = os.path.abspath(os.path.expanduser(args.folder)) | |
| if not os.path.isdir(folder): | |
| print(f"Error: '{folder}' is not a directory or cannot be accessed.", file=sys.stderr) | |
| sys.exit(1) | |
| paths = sorted(glob.glob(os.path.join(folder, args.pattern))) | |
| if not paths: | |
| print(f"No files matched {args.pattern!r} in {folder}", file=sys.stderr) | |
| sys.exit(2) | |
| grand_in = grand_out = grand_total = 0 | |
| print(f"Scanning {len(paths)} file(s) in {folder}\n") | |
| for p in paths: | |
| try: | |
| sum_in, sum_out, sum_total = sum_file(p, quiet=args.quiet, encoding=args.encoding) | |
| except Exception as e: | |
| print(f"[SKIP] {os.path.basename(p)} - {e}", file=sys.stderr) | |
| continue | |
| grand_in += sum_in | |
| grand_out += sum_out | |
| grand_total += sum_total | |
| print(f"{os.path.basename(p)}") | |
| print(f" input_tokens: {sum_in:,}") | |
| print(f" output_tokens: {sum_out:,}") | |
| print(f" total: {sum_total:,}") | |
| print() | |
| print("== ALL FILES ==") | |
| print(f"SUM input_tokens: {grand_in:,}") | |
| print(f"SUM output_tokens: {grand_out:,}") | |
| print(f"SUM total tokens: {grand_total:,}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment