Created
January 3, 2025 10:49
-
-
Save rollwagen/1143ad73cf0f939a0927a8016cb10328 to your computer and use it in GitHub Desktop.
Token counting w/ Anthropic
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# /// script | |
# requires-python = ">=3.12" | |
# dependencies = [ | |
# "click>=8.1.7", | |
# "requests>=2.31.0", | |
# ] | |
# /// | |
import click | |
import sys | |
import requests | |
from typing import List, Optional | |
API_URL = "https://api.anthropic.com/v1/messages/count_tokens" | |
def count_tokens( | |
text: str, api_key: str, model: str = "claude-3-sonnet-20240229" | |
) -> int: | |
""" | |
Count tokens for the given text using Anthropic's API. | |
""" | |
# Basic validation | |
if not text: | |
return 0 | |
headers = { | |
"x-api-key": api_key, | |
"anthropic-version": "2023-06-01", | |
"content-type": "application/json", | |
} | |
data = {"model": model, "messages": [{"role": "user", "content": text}]} | |
try: | |
response = requests.post(API_URL, headers=headers, json=data) | |
response.raise_for_status() | |
return response.json()["input_tokens"] | |
except requests.exceptions.HTTPError as e: | |
if e.response.status_code == 400: | |
error_detail = e.response.json().get("error", {}).get("message", str(e)) | |
raise click.ClickException(f"API error: {error_detail}") | |
raise | |
def read_from_stdin() -> str: | |
"""Read text from stdin if available.""" | |
if not sys.stdin.isatty(): | |
return sys.stdin.read() | |
return "" | |
def read_from_files(files: List[str]) -> str: | |
"""Read and concatenate content from multiple files.""" | |
content = [] | |
for file in files: | |
with open(file, "r") as f: | |
content.append(f.read()) | |
return "\n".join(content) | |
@click.command() | |
@click.argument("files", nargs=-1, type=click.Path(exists=True), required=False) | |
@click.option( | |
"--api-key", | |
envvar="ANTHROPIC_API_KEY", | |
help="Anthropic API key. Can also be set via ANTHROPIC_API_KEY environment variable.", | |
) | |
@click.option( | |
"--model", | |
default="claude-3-sonnet-20240229", | |
help="Model to use for token counting.", | |
) | |
@click.option("--verbose", "-v", is_flag=True, help="Show detailed output.") | |
def cli(files: tuple, api_key: Optional[str], model: str, verbose: bool): | |
""" | |
Count tokens in text using Anthropic's API. | |
Accepts input from files or stdin (pipe). | |
For running with uv `uv run token-counter.py` | |
Example usage: | |
echo "Hello, world!" | token-counter | |
token-counter file1.txt file2.txt | |
cat file.txt | token-counter | |
""" | |
if not api_key: | |
click.echo( | |
"Error: API key required. Set ANTHROPIC_API_KEY environment variable or use --api-key option.", | |
err=True, | |
) | |
sys.exit(1) | |
# Read input from stdin if available, otherwise from files | |
text = read_from_stdin() | |
if not text and files: | |
text = read_from_files(files) | |
if not text: | |
click.echo( | |
"Error: No input provided. Pipe text via stdin or provide file paths.", | |
err=True, | |
) | |
sys.exit(1) | |
try: | |
token_count = count_tokens(text, api_key, model) | |
if verbose: | |
click.echo(f"Model: {model}") | |
click.echo(f"Input length: {len(text)} characters") | |
click.echo(f"Token count: {token_count}") | |
except requests.exceptions.RequestException as e: | |
click.echo(f"Error: Failed to count tokens: {e}", err=True) | |
sys.exit(1) | |
if __name__ == "__main__": | |
cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment