Created
October 30, 2024 16:18
-
-
Save jakehemmerle/3203efa01ac455c2a8a26669eb465af2 to your computer and use it in GitHub Desktop.
Apple Silicon: Token Generation as a function of GPU-core count and Memory Bandwidth
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Corrected code based on column label error in previous attempt | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# Define the data | |
data = { | |
'Model': ['M1', 'M1', 'M1 Pro', 'M1 Pro', 'M1 Max', 'M1 Max', 'M1 Ultra', 'M1 Ultra', | |
'M2', 'M2', 'M2 Pro', 'M2 Pro', 'M2 Max', 'M2 Max', 'M2 Ultra', 'M2 Ultra', | |
'M3', 'M3', 'M3 Pro', 'M3 Pro', 'M3 Max', 'M3 Max', 'M3 Ultra', 'M3 Ultra'], | |
'BW_GB_s': [68, 68, 200, 200, 400, 400, 800, 800, 100, 100, 200, 200, 400, 400, 800, 800, 100, 100, 150, 150, 300, 400, 800, 800], | |
'GPU_Cores': [7, 8, 14, 16, 24, 32, 48, 64, 8, 10, 16, 19, 30, 38, 60, 76, 8, 10, 14, 18, 30, 40, 60, 80], | |
'F16_TG_t_s': [0, 0, 12.75, 12.75, 22.55, 23.03, 33.92, 37.01, 0, 6.72, 12.47, 13.06, 24.16, 24.65, 39.86, 41.02, 0, 0, 0, 9.89, 19.54, 25.09, 0, 0], | |
'Q8_0_TG_t_s': [7.92, 7.91, 21.95, 22.34, 37.81, 40.2, 55.69, 59.87, 12.18, 12.21, 22.7, 23.01, 39.97, 41.83, 62.14, 66.64, 0, 12.27, 17.44, 17.53, 34.3, 42.75, 0, 0], | |
'Q4_0_TG_t_s': [14.19, 14.15, 35.52, 36.41, 54.61, 61.19, 74.93, 83.73, 21.7, 21.91, 37.87, 38.86, 60.99, 65.95, 88.64, 94.27, 0, 21.34, 30.65, 30.74, 56.58, 66.31, 0, 0] | |
} | |
# Create a dataframe | |
df = pd.DataFrame(data) | |
# Create the figures | |
plt.figure(figsize=(12, 8)) | |
# F16 TG Visualization relative to Memory Bandwidth | |
plt.subplot(1, 2, 1) | |
plt.scatter(df['BW_GB_s'], df['F16_TG_t_s'], color='blue', label='F16 TG', alpha=0.5) | |
plt.scatter(df['BW_GB_s'], df['Q8_0_TG_t_s'], color='orange', label='Q8 TG', alpha=0.5) | |
plt.scatter(df['BW_GB_s'], df['Q4_0_TG_t_s'], color='green', label='Q4 TG', alpha=0.5) | |
plt.title('TG vs Memory Bandwidth') | |
plt.xlabel('Memory Bandwidth (GB/s)') | |
plt.ylabel('Token Generation (t/s)') | |
plt.xlim(0, 1000) | |
plt.ylim(0, 100) | |
plt.legend() | |
plt.grid() | |
# F16 TG Visualization relative to GPU Cores | |
plt.subplot(1, 2, 2) | |
plt.scatter(df['GPU_Cores'], df['F16_TG_t_s'], color='blue', label='F16 TG', alpha=0.5) | |
plt.scatter(df['GPU_Cores'], df['Q8_0_TG_t_s'], color='orange', label='Q8 TG', alpha=0.5) | |
plt.scatter(df['GPU_Cores'], df['Q4_0_TG_t_s'], color='green', label='Q4 TG', alpha=0.5) | |
plt.title('TG vs GPU Cores') | |
plt.xlabel('GPU Cores') | |
plt.ylabel('Token Generation (t/s)') | |
plt.xlim(0, 100) | |
plt.ylim(0, 100) | |
plt.legend() | |
plt.grid() | |
plt.tight_layout() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment