Skip to content

Instantly share code, notes, and snippets.

@ibehnam
Created October 27, 2025 00:57
Show Gist options
  • Save ibehnam/7341eee8494772249db2e349022a4b68 to your computer and use it in GitHub Desktop.
Save ibehnam/7341eee8494772249db2e349022a4b68 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -euo pipefail
# --------------------------
# Defaults
# --------------------------
BASE_MODEL_KEY="openai/gpt-oss-20b"
BASE_IDENTIFIER="openai/gpt-oss-20b"
N=4
RUNS=5
DO_SINGLE=0
DO_SEQ=1
DO_PAR=1
# Default to user's earlier port; override with --endpoint if needed
ENDPOINT="http://localhost:8080/v1"
OUTDIR="./benchmark_logs"
mkdir -p "$OUTDIR"
# Optional flags to pass to lms load
declare -a LMS_FLAGS=()
usage() {
cat <<USAGE
Usage: $0 [options]
Benchmarking:
--n N Number of LLMs to use (inclusive): BASE plus :2..:N. Default: $N
--runs R Number of runs per scenario. Default: $RUNS
--single Include single-model scenario (BASE only)
--no-seq Skip sequential scenario
--no-par Skip parallel scenario
--endpoint URL OpenAI-compatible endpoint (default: $ENDPOINT)
Model load (passed to 'lms load'):
--model-key KEY
--gpu <ratio|max|off>
--context-length <num>
--ttl <seconds>
--host <host>
--port <port>
--yes
--exact
Other:
-h, --help Show this help
Logs are saved in: $OUTDIR/
USAGE
}
# --------------------------
# Arg parsing
# --------------------------
while [[ $# -gt 0 ]]; do
case "$1" in
--n) N="${2:?}"; shift 2 ;;
--runs) RUNS="${2:?}"; shift 2 ;;
--single) DO_SINGLE=1; shift ;;
--no-seq) DO_SEQ=0; shift ;;
--no-par) DO_PAR=0; shift ;;
--endpoint) ENDPOINT="${2:?}"; shift 2 ;;
--model-key) BASE_MODEL_KEY="${2:?}"; shift 2 ;;
--gpu|--context-length|--ttl|--host|--port|--yes|--exact)
if [[ "$1" == "--yes" || "$1" == "--exact" ]]; then
LMS_FLAGS+=("$1"); shift
else
LMS_FLAGS+=("$1" "${2:?}"); shift 2
fi
;;
-h|--help) usage; exit 0 ;;
*) echo "Unknown arg: $1"; usage; exit 1 ;;
esac
done
# --------------------------
# Build model identifiers (inclusive N)
# N=1 -> [BASE]
# N>=2 -> [BASE, :2 .. :N]
# --------------------------
IDENTS=("$BASE_IDENTIFIER")
if (( N >= 2 )); then
for (( i=2; i<=N; i++ )); do
IDENTS+=("${BASE_IDENTIFIER}:$i")
done
fi
# --------------------------
# Helpers
# --------------------------
stats() {
awk '
{ x[NR]=$1; s+=$1 }
END {
if (NR==0){ print "avg = n/a, std = n/a"; exit }
avg=s/NR
for(i=1;i<=NR;i++){ sd+=(x[i]-avg)^2 }
sd=sqrt(sd/NR)
printf "avg = %.3f s, std = %.3f s\n", avg, sd
}'
}
now() { date +%s.%N; }
# send <model> <meta_outfile> <resp_outfile>
# One HTTP call:
# - Response body -> <resp_outfile>
# - Meta line "HTTP=<code> BYTES=<size> TIME=<time_total>" -> <meta_outfile>
send() {
local model="$1"
local meta_out="$2"
local resp_out="$3"
set +e
local meta
meta="$(curl "$ENDPOINT/chat/completions" \
-H "Content-Type: application/json" \
--fail-with-body \
-sS -o "$resp_out" \
-w 'HTTP=%{http_code} BYTES=%{size_download} TIME=%{time_total}\n' \
--data @- <<JSON
{
"model": "$model",
"messages": [
{ "role": "system", "content": "Always answer in rhymes. Today is Thursday" },
{ "role": "user", "content": "tell me a story" }
],
"temperature": 0.0,
"seed": 42,
"max_tokens": -1,
"stream": false
}
JSON
)"
local exit_code=$?
set -e
echo "$meta" > "$meta_out"
return "$exit_code"
}
# Pretty log the assistant message + usage + meta
format_block() {
local model="$1" resp_file="$2" meta_file="$3"
local http bytes time_total
http="$(awk '{for(i=1;i<=NF;i++){if($i~/^HTTP=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")"
bytes="$(awk '{for(i=1;i<=NF;i++){if($i~/^BYTES=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")"
time_total="$(awk '{for(i=1;i<=NF;i++){if($i~/^TIME=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")"
echo "[Model $model]"
if [[ "$http" != "200" ]]; then
echo "ERROR:"
echo " http_code: ${http:-n/a}"
echo " bytes: ${bytes:-n/a}"
echo " curl_time: ${time_total:-n/a} s"
echo " body (first 200 chars):"
if command -v head >/dev/null 2>&1; then
head -c 200 "$resp_file" | sed 's/^/ /'
echo
else
sed -n '1,10p' "$resp_file" | sed 's/^/ /'
fi
return 0
fi
if ! command -v jq >/dev/null 2>&1; then
echo "Assistant:"
echo " (jq not installed; raw body preview)"
sed -n '1,20p' "$resp_file" | sed 's/^/ /'
echo "Usage:"
echo " (jq not installed)"
echo "Meta:"
echo " http_code: $http"
echo " bytes: $bytes"
echo " curl_time: ${time_total:-n/a} s"
return 0
fi
local content prompt_tokens completion_tokens total_tokens
content="$(jq -r '(.choices|last|.message|.content) // .choices[0].message.content // empty' "$resp_file" 2>/dev/null || true)"
prompt_tokens="$(jq -r '.usage.prompt_tokens // empty' "$resp_file" 2>/dev/null || true)"
completion_tokens="$(jq -r '.usage.completion_tokens // empty' "$resp_file" 2>/dev/null || true)"
total_tokens="$(jq -r '.usage.total_tokens // empty' "$resp_file" 2>/dev/null || true)"
echo "Assistant:"
if [[ -n "$content" ]]; then
printf "%s\n" "$content" | sed $'s/\r$//' | sed 's/^/ /'
else
echo " (no content in JSON)"
fi
echo "Usage:"
echo " prompt_tokens: ${prompt_tokens:-n/a}"
echo " completion_tokens: ${completion_tokens:-n/a}"
echo " total_tokens: ${total_tokens:-n/a}"
echo "Meta:"
echo " http_code: $http"
echo " bytes: $bytes"
echo " curl_time: ${time_total:-n/a} s"
}
is_loaded() {
lms ps 2>/dev/null | grep -F "$1" >/dev/null || return 1
}
ensure_loaded() {
local ident="$1"
if is_loaded "$ident"; then
echo "Already loaded: $ident"
return 0
fi
echo "Loading: $ident"
if ((${#LMS_FLAGS[@]})); then
lms load "$BASE_MODEL_KEY" --identifier "$ident" "${LMS_FLAGS[@]}" >/dev/null
else
lms load "$BASE_MODEL_KEY" --identifier "$ident" >/dev/null
fi
}
# --------------------------
# Progress bar for sequential scenario
# Fills only when a model returns HTTP 200 (i.e., send() exit code 0)
# --------------------------
draw_progress() {
local done="$1" total="$2" width="${3:-40}"
local filled=$(( (done * width) / total ))
local empty=$(( width - filled ))
local filled_str empty_str
printf -v filled_str '%*s' "$filled" ''
filled_str=${filled_str// /#}
printf -v empty_str '%*s' "$empty" ''
empty_str=${empty_str// /-}
# print to stdout (live), carriage-returned
printf "\r[%s%s] %d/%d complete" "$filled_str" "$empty_str" "$done" "$total"
}
# --------------------------
# Load models once
# --------------------------
echo "== Ensuring models are loaded via lms =="
for ident in "${IDENTS[@]}"; do
ensure_loaded "$ident"
done
echo "OK."
echo
# --------------------------
# Scenario runner with logging
# --------------------------
run_scenario() {
local scenario="$1"
local logfile="$OUTDIR/${scenario}_results.log"
local timefile="${scenario}_times.tmp"
rm -f "$logfile" "$timefile"
for r in $(seq 1 "$RUNS"); do
echo "----- RUN #$r -----" >> "$logfile"
echo "Run $r..."
local start end elapsed
start="$(now)"
case "$scenario" in
single)
{
local resp="tmp_${scenario}_resp_${r}.json"
local meta="tmp_${scenario}_meta_${r}.txt"
if send "${IDENTS[0]}" "$meta" "$resp"; then
format_block "${IDENTS[0]}" "$resp" "$meta"
else
format_block "${IDENTS[0]}" "$resp" "$meta"
fi
} >> "$logfile"
;;
sequential)
local idx=1
local done_count=0
local total_models="${#IDENTS[@]}"
# initial bar
draw_progress "$done_count" "$total_models"
for m in "${IDENTS[@]}"; do
local resp="tmp_${scenario}_resp_${r}_${idx}.json"
local meta="tmp_${scenario}_meta_${r}_${idx}.txt"
local success=0
if send "$m" "$meta" "$resp"; then
success=1
fi
# Always log the formatted block
format_block "$m" "$resp" "$meta" >> "$logfile"
# Only count full/OK responses toward the bar
if (( success )); then
((done_count++))
fi
draw_progress "$done_count" "$total_models"
((idx++))
done
# finish the line
echo
;;
parallel)
local idx=1
declare -a pids=()
declare -a models=()
declare -a resps=()
declare -a metas=()
for m in "${IDENTS[@]}"; do
local resp="tmp_${scenario}_resp_${r}_${idx}.json"
local meta="tmp_${scenario}_meta_${r}_${idx}.txt"
models+=("$m"); resps+=("$resp"); metas+=("$meta")
( send "$m" "$meta" "$resp" ) & pids+=($!)
((idx++))
done
for pid in "${pids[@]}"; do wait "$pid" || true; done
for i in "${!models[@]}"; do
format_block "${models[$i]}" "${resps[$i]}" "${metas[$i]}" >> "$logfile"
done
;;
esac
end="$(now)"
elapsed="$(echo "$end - $start" | bc)"
{
echo "ELAPSED: $elapsed s"
echo
} >> "$logfile"
echo "$elapsed" >> "$timefile"
done
echo "$scenario total wall time:"
stats < "$timefile"
}
# --------------------------
# Run scenarios
# --------------------------
if (( DO_SINGLE )); then
echo "=== SINGLE (${IDENTS[0]}) ==="
run_scenario "single"
echo
fi
if (( DO_SEQ )); then
echo "=== SEQUENTIAL (${#IDENTS[@]} models) ==="
run_scenario "sequential"
echo
fi
if (( DO_PAR )); then
echo "=== PARALLEL (${#IDENTS[@]} models) ==="
run_scenario "parallel"
echo
fi
# --------------------------
# Summary
# --------------------------
echo "=== SUMMARY ==="
if (( DO_SINGLE )); then printf "SINGLE: "; stats < single_times.tmp; fi
if (( DO_SEQ )); then printf "SEQUENTIAL: "; stats < sequential_times.tmp; fi
if (( DO_PAR )); then printf "PARALLEL: "; stats < parallel_times.tmp; fi
echo
echo "Logs saved to: $OUTDIR/"
echo "Hint: if your server is on a different port, run with: --endpoint http://localhost:1234/v1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment