|
#!/usr/bin/env python3 |
|
"""Generate benchmark plots from Qwen3-TTS batch generation results.""" |
|
|
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import os |
|
|
|
# Results data |
|
MODELS = ["4-bit", "6-bit", "8-bit", "bf16"] |
|
|
|
# --- Sequential (single generate()) baseline — short prompt --- |
|
SEQ_TPS = {"4-bit": 21.2, "6-bit": 21.0, "8-bit": 20.5, "bf16": 20.2} |
|
SEQ_RTF = {"4-bit": 1.69, "6-bit": 1.68, "8-bit": 1.64, "bf16": 1.62} |
|
SEQ_TTFB = {"4-bit": 57.2, "6-bit": 64.0, "8-bit": 60.9, "bf16": 62.0} |
|
SEQ_MEMORY = {"4-bit": 3.51, "6-bit": 3.89, "8-bit": 4.28, "bf16": 5.72} |
|
|
|
# --- Batch generate() results — short prompt --- |
|
BATCH_SIZES = [1, 2, 4] |
|
|
|
BATCH_TPS = { |
|
"4-bit": [19.3, 30.9, 46.5], |
|
"6-bit": [20.4, 33.1, 46.7], |
|
"8-bit": [19.9, 32.5, 46.4], |
|
"bf16": [19.3, 28.9, 41.6], |
|
} |
|
|
|
BATCH_THROUGHPUT = { |
|
"4-bit": [1.55, 2.47, 3.72], |
|
"6-bit": [1.63, 2.65, 3.74], |
|
"8-bit": [1.59, 2.60, 3.71], |
|
"bf16": [1.54, 2.31, 3.33], |
|
} |
|
|
|
BATCH_TTFB = { |
|
"4-bit": [68.4, 85.0, 110.9], |
|
"6-bit": [69.2, 86.2, 112.6], |
|
"8-bit": [67.2, 86.7, 110.6], |
|
"bf16": [68.6, 95.7, 119.7], |
|
} |
|
|
|
BATCH_MEMORY = { |
|
"4-bit": [3.51, 3.54, 3.60], |
|
"6-bit": [3.89, 3.92, 3.98], |
|
"8-bit": [4.28, 4.31, 4.37], |
|
"bf16": [5.72, 5.75, 5.81], |
|
} |
|
|
|
BATCH_TOTAL_TIME = { |
|
"4-bit": [1241.2, 1757.2, 2108.1], |
|
"6-bit": [1223.4, 1344.0, 1926.6], |
|
"8-bit": [1156.7, 1462.3, 2047.7], |
|
"bf16": [1218.1, 1576.5, 2162.7], |
|
} |
|
|
|
COLORS = { |
|
"4-bit": "#2196F3", |
|
"6-bit": "#4CAF50", |
|
"8-bit": "#FF9800", |
|
"bf16": "#F44336", |
|
} |
|
MARKERS = {"4-bit": "o", "6-bit": "s", "8-bit": "D", "bf16": "^"} |
|
|
|
OUT_DIR = "benchmarks/plots" |
|
os.makedirs(OUT_DIR, exist_ok=True) |
|
|
|
plt.rcParams.update({ |
|
"figure.facecolor": "white", |
|
"axes.grid": True, |
|
"grid.alpha": 0.3, |
|
"font.size": 11, |
|
}) |
|
|
|
# X-axis labels: Sequential, then batch sizes |
|
X_LABELS = ["Seq (1)", "Batch 1", "Batch 2", "Batch 4"] |
|
X_POS = [0, 1, 2, 3] |
|
|
|
|
|
def _full_tps(model): |
|
return [SEQ_TPS[model]] + BATCH_TPS[model] |
|
|
|
|
|
def _full_throughput(model): |
|
return [SEQ_RTF[model]] + BATCH_THROUGHPUT[model] |
|
|
|
|
|
def _full_ttfb(model): |
|
return [SEQ_TTFB[model]] + BATCH_TTFB[model] |
|
|
|
|
|
def _full_memory(model): |
|
return [SEQ_MEMORY[model]] + BATCH_MEMORY[model] |
|
|
|
|
|
def plot_tps(): |
|
fig, ax = plt.subplots(figsize=(9, 5.5)) |
|
for model in MODELS: |
|
vals = _full_tps(model) |
|
ax.plot( |
|
X_POS, vals, |
|
marker=MARKERS[model], color=COLORS[model], |
|
linewidth=2, markersize=8, label=model, |
|
) |
|
# Vertical separator between sequential and batch |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.text(0.0, ax.get_ylim()[1] * 0.97, "Sequential", ha="center", fontsize=8, color="gray") |
|
ax.text(2.0, ax.get_ylim()[1] * 0.97, "Batched", ha="center", fontsize=8, color="gray") |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Tokens per Second") |
|
ax.set_title("Tokens per Second — Sequential vs Batched") |
|
ax.set_xticks(X_POS) |
|
ax.set_xticklabels(X_LABELS) |
|
ax.legend() |
|
fig.tight_layout() |
|
path = os.path.join(OUT_DIR, "tps_vs_batch.png") |
|
fig.savefig(path, dpi=150) |
|
plt.close(fig) |
|
print(f"Saved: {path}") |
|
|
|
|
|
def plot_throughput(): |
|
fig, ax = plt.subplots(figsize=(9, 5.5)) |
|
for model in MODELS: |
|
vals = _full_throughput(model) |
|
ax.plot( |
|
X_POS, vals, |
|
marker=MARKERS[model], color=COLORS[model], |
|
linewidth=2, markersize=8, label=model, |
|
) |
|
# Ideal linear scaling reference (from batch 1 onward) |
|
ax.plot( |
|
X_POS[1:], BATCH_SIZES, |
|
linestyle="--", color="gray", alpha=0.5, label="Ideal linear", |
|
) |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.text(0.0, ax.get_ylim()[1] * 0.97, "Sequential", ha="center", fontsize=8, color="gray") |
|
ax.text(2.0, ax.get_ylim()[1] * 0.97, "Batched", ha="center", fontsize=8, color="gray") |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Throughput (audio duration / wall time)") |
|
ax.set_title("Throughput Scaling — Sequential vs Batched") |
|
ax.set_xticks(X_POS) |
|
ax.set_xticklabels(X_LABELS) |
|
ax.legend() |
|
fig.tight_layout() |
|
path = os.path.join(OUT_DIR, "throughput_vs_batch.png") |
|
fig.savefig(path, dpi=150) |
|
plt.close(fig) |
|
print(f"Saved: {path}") |
|
|
|
|
|
def plot_ttfb(): |
|
fig, ax = plt.subplots(figsize=(9, 5.5)) |
|
for model in MODELS: |
|
vals = _full_ttfb(model) |
|
ax.plot( |
|
X_POS, vals, |
|
marker=MARKERS[model], color=COLORS[model], |
|
linewidth=2, markersize=8, label=model, |
|
) |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.text(0.0, ax.get_ylim()[1] * 0.97, "Sequential", ha="center", fontsize=8, color="gray") |
|
ax.text(2.0, ax.get_ylim()[1] * 0.97, "Batched", ha="center", fontsize=8, color="gray") |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Average TTFB (ms)") |
|
ax.set_title("Time to First Byte — Sequential vs Batched") |
|
ax.set_xticks(X_POS) |
|
ax.set_xticklabels(X_LABELS) |
|
ax.legend() |
|
fig.tight_layout() |
|
path = os.path.join(OUT_DIR, "ttfb_vs_batch.png") |
|
fig.savefig(path, dpi=150) |
|
plt.close(fig) |
|
print(f"Saved: {path}") |
|
|
|
|
|
def plot_memory(): |
|
fig, ax = plt.subplots(figsize=(9, 5.5)) |
|
x = np.arange(len(X_LABELS)) |
|
width = 0.18 |
|
for i, model in enumerate(MODELS): |
|
offset = (i - 1.5) * width |
|
vals = _full_memory(model) |
|
bars = ax.bar( |
|
x + offset, vals, width, |
|
label=model, color=COLORS[model], alpha=0.85, |
|
) |
|
for bar, val in zip(bars, vals): |
|
ax.text( |
|
bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05, |
|
f"{val:.1f}", ha="center", va="bottom", fontsize=7, |
|
) |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Peak Memory (GB)") |
|
ax.set_title("Peak Memory Usage — Sequential vs Batched") |
|
ax.set_xticks(x) |
|
ax.set_xticklabels(X_LABELS) |
|
ax.legend() |
|
ax.set_ylim(0, 7.5) |
|
fig.tight_layout() |
|
path = os.path.join(OUT_DIR, "memory_vs_batch.png") |
|
fig.savefig(path, dpi=150) |
|
plt.close(fig) |
|
print(f"Saved: {path}") |
|
|
|
|
|
def plot_efficiency(): |
|
"""Throughput per GB of memory: sequential vs batch=4.""" |
|
fig, ax = plt.subplots(figsize=(9, 5.5)) |
|
|
|
x = np.arange(len(MODELS)) |
|
width = 0.3 |
|
|
|
seq_eff = [SEQ_RTF[m] / SEQ_MEMORY[m] for m in MODELS] |
|
batch4_eff = [BATCH_THROUGHPUT[m][2] / BATCH_MEMORY[m][2] for m in MODELS] |
|
|
|
bars1 = ax.bar( |
|
x - width / 2, seq_eff, width, |
|
label="Sequential", color=[COLORS[m] for m in MODELS], alpha=0.45, |
|
edgecolor=[COLORS[m] for m in MODELS], linewidth=1.5, |
|
) |
|
bars2 = ax.bar( |
|
x + width / 2, batch4_eff, width, |
|
label="Batch 4", color=[COLORS[m] for m in MODELS], alpha=0.85, |
|
) |
|
|
|
for bar, val in zip(bars1, seq_eff): |
|
ax.text( |
|
bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, |
|
f"{val:.2f}", ha="center", va="bottom", fontsize=9, |
|
) |
|
for bar, val in zip(bars2, batch4_eff): |
|
ax.text( |
|
bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, |
|
f"{val:.2f}", ha="center", va="bottom", fontsize=9, |
|
) |
|
|
|
ax.set_xticks(x) |
|
ax.set_xticklabels(MODELS) |
|
ax.set_ylabel("Throughput per GB (x / GB)") |
|
ax.set_title("Memory Efficiency — Sequential vs Batch 4") |
|
ax.legend() |
|
fig.tight_layout() |
|
path = os.path.join(OUT_DIR, "efficiency_batch4.png") |
|
fig.savefig(path, dpi=150) |
|
plt.close(fig) |
|
print(f"Saved: {path}") |
|
|
|
|
|
def plot_combined_summary(): |
|
"""2x2 summary figure.""" |
|
fig, axes = plt.subplots(2, 2, figsize=(15, 11)) |
|
|
|
# TPS |
|
ax = axes[0, 0] |
|
for model in MODELS: |
|
ax.plot( |
|
X_POS, _full_tps(model), |
|
marker=MARKERS[model], color=COLORS[model], |
|
linewidth=2, markersize=7, label=model, |
|
) |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Tokens / sec") |
|
ax.set_title("Tokens per Second") |
|
ax.set_xticks(X_POS) |
|
ax.set_xticklabels(X_LABELS, fontsize=9) |
|
ax.legend(fontsize=9) |
|
ax.grid(True, alpha=0.3) |
|
|
|
# Throughput |
|
ax = axes[0, 1] |
|
for model in MODELS: |
|
ax.plot( |
|
X_POS, _full_throughput(model), |
|
marker=MARKERS[model], color=COLORS[model], |
|
linewidth=2, markersize=7, label=model, |
|
) |
|
ax.plot(X_POS[1:], BATCH_SIZES, "--", color="gray", alpha=0.5, label="Ideal") |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Audio duration / wall time") |
|
ax.set_title("Throughput Scaling") |
|
ax.set_xticks(X_POS) |
|
ax.set_xticklabels(X_LABELS, fontsize=9) |
|
ax.legend(fontsize=9) |
|
ax.grid(True, alpha=0.3) |
|
|
|
# TTFB |
|
ax = axes[1, 0] |
|
for model in MODELS: |
|
ax.plot( |
|
X_POS, _full_ttfb(model), |
|
marker=MARKERS[model], color=COLORS[model], |
|
linewidth=2, markersize=7, label=model, |
|
) |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Avg TTFB (ms)") |
|
ax.set_title("Time to First Byte") |
|
ax.set_xticks(X_POS) |
|
ax.set_xticklabels(X_LABELS, fontsize=9) |
|
ax.legend(fontsize=9) |
|
ax.grid(True, alpha=0.3) |
|
|
|
# Memory |
|
ax = axes[1, 1] |
|
x = np.arange(len(X_LABELS)) |
|
width = 0.18 |
|
for i, model in enumerate(MODELS): |
|
offset = (i - 1.5) * width |
|
ax.bar( |
|
x + offset, _full_memory(model), width, |
|
label=model, color=COLORS[model], alpha=0.85, |
|
) |
|
ax.axvline(x=0.5, color="gray", linestyle=":", alpha=0.4) |
|
ax.set_xlabel("Generation Mode") |
|
ax.set_ylabel("Peak Memory (GB)") |
|
ax.set_title("Memory Usage") |
|
ax.set_xticks(x) |
|
ax.set_xticklabels(X_LABELS, fontsize=9) |
|
ax.legend(fontsize=9) |
|
ax.set_ylim(0, 7.5) |
|
ax.grid(True, alpha=0.3) |
|
|
|
fig.suptitle( |
|
"Qwen3-TTS Batch Generation — CustomVoice 1.7B (short prompt)", |
|
fontsize=14, fontweight="bold", y=0.98, |
|
) |
|
fig.tight_layout(rect=[0, 0, 1, 0.96]) |
|
path = os.path.join(OUT_DIR, "summary.png") |
|
fig.savefig(path, dpi=150) |
|
plt.close(fig) |
|
print(f"Saved: {path}") |
|
|
|
|
|
if __name__ == "__main__": |
|
plot_tps() |
|
plot_throughput() |
|
plot_ttfb() |
|
plot_memory() |
|
plot_efficiency() |
|
plot_combined_summary() |
|
print(f"\nAll plots saved to {OUT_DIR}/") |