Skip to content

Instantly share code, notes, and snippets.

@atemate
Last active November 10, 2025 17:33
Show Gist options
  • Save atemate/f365224beec263a7b8a634da48dc0885 to your computer and use it in GitHub Desktop.
Save atemate/f365224beec263a7b8a634da48dc0885 to your computer and use it in GitHub Desktop.
Rough Cost comparison: Clouds vs Self-hosted
tokens cost_aws_managed cost_gcp_managed cost_L4_self_hosted cost_A10G_self_hosted latency_L4_seconds latency_L4_hours latency_A10G_seconds latency_A10G_hours latency_T4_seconds latency_T4_hours latency_H100_seconds latency_H100_hours latency_GCP_Vertex_seconds latency_GCP_Vertex_hours
1000000 4.0 11.249999999999998 0.27777777777777773 6.733333333333333 2000.0 0.5555555555555556 20000.0 5.555555555555555 33333.333333333336 9.25925925925926 1600.0 0.4444444444444444 1600.0 0.4444444444444444
2000000 8.0 22.499999999999996 0.5555555555555555 13.466666666666667 4000.0 1.1111111111111112 40000.0 11.11111111111111 66666.66666666667 18.51851851851852 3200.0 0.8888888888888888 3200.0 0.8888888888888888
3000000 12.0 33.75 0.8333333333333333 20.2 6000.0 1.6666666666666667 60000.0 16.666666666666668 100000.0 27.77777777777778 4800.0 1.3333333333333333 4800.0 1.3333333333333333
4000000 16.0 44.99999999999999 1.111111111111111 26.933333333333334 8000.0 2.2222222222222223 80000.0 22.22222222222222 133333.33333333334 37.03703703703704 6400.0 1.7777777777777777 6400.0 1.7777777777777777
5000000 20.0 56.24999999999999 1.3888888888888888 33.666666666666664 10000.0 2.7777777777777777 100000.0 27.77777777777778 166666.66666666666 46.29629629629629 8000.0 2.2222222222222223 8000.0 2.2222222222222223
6000000 24.0 67.5 1.6666666666666665 40.4 12000.0 3.333333333333333 120000.0 33.333333333333336 200000.0 55.55555555555556 9600.0 2.6666666666666665 9600.0 2.6666666666666665
7000000 28.0 78.75 1.9444444444444444 47.13333333333333 14000.0 3.888888888888889 140000.0 38.888888888888886 233333.3333333333 64.81481481481482 11200.0 3.111111111111111 11200.0 3.111111111111111
8000000 32.0 89.99999999999999 2.222222222222222 53.86666666666667 16000.0 4.444444444444445 160000.0 44.44444444444444 266666.6666666667 74.07407407407408 12800.0 3.555555555555556 12800.0 3.5555555555555554
9000000 36.0 101.24999999999999 2.5 60.599999999999994 18000.0 5.0 180000.0 50.0 300000.0 83.33333333333333 14400.0 4.0 14400.0 4.0
10000000 40.0 112.49999999999999 2.7777777777777777 67.33333333333333 20000.0 5.555555555555555 200000.0 55.55555555555556 333333.3333333333 92.59259259259258 16000.0 4.444444444444445 16000.0 4.444444444444445
#!/usr/bin/env python3
import pandas as pd
import matplotlib.pyplot as plt
import os
# Read the CSV file
csv_path = os.path.expanduser('~/Downloads/cost_comparison.csv')
df = pd.read_csv(csv_path)
# Convert tokens to millions for better readability
df['tokens_millions'] = df['tokens'] / 1_000_000
# Convert to Piece & Wars volumes (1M tokens = 3 volumes)
df['piece_and_wars_volumes'] = df['tokens_millions'] * 3
# Create figure for cost comparison
plt.figure(figsize=(10, 6))
plt.plot(df['piece_and_wars_volumes'], df['cost_gcp_managed'], 'blue', marker='o', label='GCP Vertex AI', linewidth=2, markersize=6)
plt.plot(df['piece_and_wars_volumes'], df['cost_aws_managed'], 'orange', marker='o', label='AWS Bedrock', linewidth=2, markersize=6)
plt.plot(df['piece_and_wars_volumes'], df['cost_L4_self_hosted'], 'green', marker='o', label='Self-hosted L4 (500 tps)', linewidth=2, markersize=6)
plt.xlabel('Piece & War volumes', fontsize=12)
plt.ylabel('Cost ($)', fontsize=12)
plt.title('Rough Cost Comparison: Clouds vs Self-hosted L4', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
# Save the cost plot
plt.savefig('/tmp/cost_to_tokens.png', dpi=300, bbox_inches='tight')
print("Cost plot saved to /tmp/cost_to_tokens.png")
# Create figure for latency comparison
plt.figure(figsize=(10, 6))
# Plot latency in hours for better readability
plt.plot(df['piece_and_wars_volumes'], df['latency_GCP_Vertex_hours'], 'chocolate', marker='o', label='GCP/AWS', linewidth=2, markersize=6)
# plt.plot(df['piece_and_wars_volumes'], df['latency_AWS_Managed_hours'], 'orange', marker='o', label='AWS Bedrock', linewidth=2, markersize=6)
plt.plot(df['piece_and_wars_volumes'], df['latency_L4_hours'], 'green', marker='o', label='Self-hosted L4', linewidth=2, markersize=6)
plt.xlabel('Piece & War volumes', fontsize=12)
plt.ylabel('Latency (Hours)', fontsize=12)
plt.title('Rough Latency Comparison: Clouds vs Self-hosted L4', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('/tmp/latency_to_tokens.png', dpi=300, bbox_inches='tight')
print("Latency plot saved to /tmp/latency_to_tokens.png")
plt.close('all')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment