Skip to content

Instantly share code, notes, and snippets.

@TurtleShip
Last active August 11, 2025 05:25
Show Gist options
  • Save TurtleShip/8af4d9ccd6b6d4c1c59f663cb877cf31 to your computer and use it in GitHub Desktop.
Save TurtleShip/8af4d9ccd6b6d4c1c59f663cb877cf31 to your computer and use it in GitHub Desktop.
Testing code_execution_20250522 with token counting
#!/usr/bin/env python3
"""
Test script to verify if token counting works with code_execution_20250522 tool.
This answers Brad's question: "Does token counting not work with code_execution_20250522?"
According to the docs:
- Token counting: https://docs.anthropic.com/en/docs/build-with-claude/token-counting
- Code execution tool: https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/code-execution-tool
"""
import os
import anthropic
from dotenv import load_dotenv
load_dotenv()
client = anthropic.Anthropic(
api_key=os.getenv("ANTHROPIC_API_KEY"),
# default_headers={"anthropic-beta": "code-execution-20250522"},
)
def test_without_code_execution():
"""Test token counting without code execution tool."""
print("\n=== Test 1: WITHOUT code_execution_20250522 ===")
response = client.messages.create(
model="claude-opus-4-1-20250805",
max_tokens=256,
messages=[
{"role": "user", "content": "What is 2 + 2? Just tell me the answer."}
],
)
print(f"Response: {response.content[0].text}")
print(f"\nToken usage:")
print(f" Input tokens: {response.usage.input_tokens}")
print(f" Output tokens: {response.usage.output_tokens}")
print(
f" Total tokens: {response.usage.input_tokens + response.usage.output_tokens}"
)
return response.usage
def test_with_code_execution():
"""Test token counting with code execution tool."""
print("\n=== Test 2: WITH code_execution_20250522 ===")
response = client.beta.messages.create(
model="claude-opus-4-1-20250805",
betas=["code-execution-2025-05-22", "files-api-2025-04-14"],
max_tokens=4096,
tools=[{"type": "code_execution_20250522", "name": "code_execution"}],
messages=[
{
"role": "user",
"content": "Use Python to calculate 2 + 2 and show me the result.",
}
],
)
# Print the response content
for content in response.content:
if content.type == "text":
print(f"Text response: {content.text}")
elif content.type == "tool_use":
print(f"Tool use: {content.name}")
print(f"Tool input: {content.input}")
print(f"raw response: {response.json()}")
print(f"\nToken usage:")
print(f" Input tokens: {response.usage.input_tokens}")
print(f" Output tokens: {response.usage.output_tokens}")
print(
f" Total tokens: {response.usage.input_tokens + response.usage.output_tokens}"
)
return response.usage
def test_with_code_execution_complex():
"""Test token counting with a more complex code execution."""
print("\n=== Test 3: WITH code_execution_20250522 (Complex Example) ===")
response = client.beta.messages.create(
model="claude-opus-4-1-20250805",
betas=["code-execution-2025-05-22", "files-api-2025-04-14"],
max_tokens=4096,
messages=[
{
"role": "user",
"content": """Write and execute Python code to:
1. Create a list of the first 10 Fibonacci numbers
2. Calculate their sum
3. Find the average
4. Print all results""",
}
],
tools=[{"type": "code_execution_20250522", "name": "code_execution"}],
)
# Print the response content
for content in response.content:
if content.type == "text":
print(
f"Text response: {content.text[:200]}..."
if len(content.text) > 200
else f"Text response: {content.text}"
)
elif content.type == "tool_use":
print(f"Tool use: {content.name}")
if hasattr(content.input, "code"):
print(
f"Code executed:\n{content.input['code'][:200]}..."
if len(content.input["code"]) > 200
else f"Code executed:\n{content.input['code']}"
)
print(f"raw response: {response.json()}")
print(f"\nToken usage:")
print(f" Input tokens: {response.usage.input_tokens}")
print(f" Output tokens: {response.usage.output_tokens}")
print(
f" Total tokens: {response.usage.input_tokens + response.usage.output_tokens}"
)
# Check if usage includes cache tokens (if using prompt caching)
if hasattr(response.usage, "cache_creation_input_tokens"):
print(
f" Cache creation input tokens: {response.usage.cache_creation_input_tokens}"
)
if hasattr(response.usage, "cache_read_input_tokens"):
print(f" Cache read input tokens: {response.usage.cache_read_input_tokens}")
return response.usage
def main():
"""Main function to run all tests and provide answer to Brad's question."""
print("=" * 60)
print("Testing Token Counting with code_execution_20250522")
print("=" * 60)
try:
# Run tests
usage1 = test_without_code_execution()
usage2 = test_with_code_execution()
usage3 = test_with_code_execution_complex()
# Summary
print("\n" + "=" * 60)
print("ANSWER TO BRAD'S QUESTION:")
print("=" * 60)
print("\n✅ YES, token counting DOES work with code_execution_20250522!")
print("\nThe API response includes the 'usage' field with:")
print(
"- input_tokens: Number of tokens in the input (prompt + tools definition)"
)
print("- output_tokens: Number of tokens in the output (response + tool calls)")
print("\nKey findings:")
print(
f"1. Without code execution: {usage1.input_tokens + usage1.output_tokens} total tokens"
)
print(
f"2. With simple code execution: {usage2.input_tokens + usage2.output_tokens} total tokens"
)
print(
f"3. With complex code execution: {usage3.input_tokens + usage3.output_tokens} total tokens"
)
print("\nNote: When using code_execution_20250522, the token count includes:")
print("- The tool definition in the input")
print("- The code generated and tool_use blocks in the output")
print("- Any text explanations provided by Claude")
except Exception as e:
print(f"\n❌ Error during testing: {e}")
print("\nThis might mean:")
print("1. The API key is not set properly")
print(
"2. The code_execution_20250522 tool might not be available on your account"
)
print("3. Network or API issues")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment