Created
October 27, 2025 00:57
-
-
Save ibehnam/7341eee8494772249db2e349022a4b68 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # -------------------------- | |
| # Defaults | |
| # -------------------------- | |
| BASE_MODEL_KEY="openai/gpt-oss-20b" | |
| BASE_IDENTIFIER="openai/gpt-oss-20b" | |
| N=4 | |
| RUNS=5 | |
| DO_SINGLE=0 | |
| DO_SEQ=1 | |
| DO_PAR=1 | |
| # Default to user's earlier port; override with --endpoint if needed | |
| ENDPOINT="http://localhost:8080/v1" | |
| OUTDIR="./benchmark_logs" | |
| mkdir -p "$OUTDIR" | |
| # Optional flags to pass to lms load | |
| declare -a LMS_FLAGS=() | |
| usage() { | |
| cat <<USAGE | |
| Usage: $0 [options] | |
| Benchmarking: | |
| --n N Number of LLMs to use (inclusive): BASE plus :2..:N. Default: $N | |
| --runs R Number of runs per scenario. Default: $RUNS | |
| --single Include single-model scenario (BASE only) | |
| --no-seq Skip sequential scenario | |
| --no-par Skip parallel scenario | |
| --endpoint URL OpenAI-compatible endpoint (default: $ENDPOINT) | |
| Model load (passed to 'lms load'): | |
| --model-key KEY | |
| --gpu <ratio|max|off> | |
| --context-length <num> | |
| --ttl <seconds> | |
| --host <host> | |
| --port <port> | |
| --yes | |
| --exact | |
| Other: | |
| -h, --help Show this help | |
| Logs are saved in: $OUTDIR/ | |
| USAGE | |
| } | |
| # -------------------------- | |
| # Arg parsing | |
| # -------------------------- | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| --n) N="${2:?}"; shift 2 ;; | |
| --runs) RUNS="${2:?}"; shift 2 ;; | |
| --single) DO_SINGLE=1; shift ;; | |
| --no-seq) DO_SEQ=0; shift ;; | |
| --no-par) DO_PAR=0; shift ;; | |
| --endpoint) ENDPOINT="${2:?}"; shift 2 ;; | |
| --model-key) BASE_MODEL_KEY="${2:?}"; shift 2 ;; | |
| --gpu|--context-length|--ttl|--host|--port|--yes|--exact) | |
| if [[ "$1" == "--yes" || "$1" == "--exact" ]]; then | |
| LMS_FLAGS+=("$1"); shift | |
| else | |
| LMS_FLAGS+=("$1" "${2:?}"); shift 2 | |
| fi | |
| ;; | |
| -h|--help) usage; exit 0 ;; | |
| *) echo "Unknown arg: $1"; usage; exit 1 ;; | |
| esac | |
| done | |
| # -------------------------- | |
| # Build model identifiers (inclusive N) | |
| # N=1 -> [BASE] | |
| # N>=2 -> [BASE, :2 .. :N] | |
| # -------------------------- | |
| IDENTS=("$BASE_IDENTIFIER") | |
| if (( N >= 2 )); then | |
| for (( i=2; i<=N; i++ )); do | |
| IDENTS+=("${BASE_IDENTIFIER}:$i") | |
| done | |
| fi | |
| # -------------------------- | |
| # Helpers | |
| # -------------------------- | |
| stats() { | |
| awk ' | |
| { x[NR]=$1; s+=$1 } | |
| END { | |
| if (NR==0){ print "avg = n/a, std = n/a"; exit } | |
| avg=s/NR | |
| for(i=1;i<=NR;i++){ sd+=(x[i]-avg)^2 } | |
| sd=sqrt(sd/NR) | |
| printf "avg = %.3f s, std = %.3f s\n", avg, sd | |
| }' | |
| } | |
| now() { date +%s.%N; } | |
| # send <model> <meta_outfile> <resp_outfile> | |
| # One HTTP call: | |
| # - Response body -> <resp_outfile> | |
| # - Meta line "HTTP=<code> BYTES=<size> TIME=<time_total>" -> <meta_outfile> | |
| send() { | |
| local model="$1" | |
| local meta_out="$2" | |
| local resp_out="$3" | |
| set +e | |
| local meta | |
| meta="$(curl "$ENDPOINT/chat/completions" \ | |
| -H "Content-Type: application/json" \ | |
| --fail-with-body \ | |
| -sS -o "$resp_out" \ | |
| -w 'HTTP=%{http_code} BYTES=%{size_download} TIME=%{time_total}\n' \ | |
| --data @- <<JSON | |
| { | |
| "model": "$model", | |
| "messages": [ | |
| { "role": "system", "content": "Always answer in rhymes. Today is Thursday" }, | |
| { "role": "user", "content": "tell me a story" } | |
| ], | |
| "temperature": 0.0, | |
| "seed": 42, | |
| "max_tokens": -1, | |
| "stream": false | |
| } | |
| JSON | |
| )" | |
| local exit_code=$? | |
| set -e | |
| echo "$meta" > "$meta_out" | |
| return "$exit_code" | |
| } | |
| # Pretty log the assistant message + usage + meta | |
| format_block() { | |
| local model="$1" resp_file="$2" meta_file="$3" | |
| local http bytes time_total | |
| http="$(awk '{for(i=1;i<=NF;i++){if($i~/^HTTP=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")" | |
| bytes="$(awk '{for(i=1;i<=NF;i++){if($i~/^BYTES=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")" | |
| time_total="$(awk '{for(i=1;i<=NF;i++){if($i~/^TIME=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")" | |
| echo "[Model $model]" | |
| if [[ "$http" != "200" ]]; then | |
| echo "ERROR:" | |
| echo " http_code: ${http:-n/a}" | |
| echo " bytes: ${bytes:-n/a}" | |
| echo " curl_time: ${time_total:-n/a} s" | |
| echo " body (first 200 chars):" | |
| if command -v head >/dev/null 2>&1; then | |
| head -c 200 "$resp_file" | sed 's/^/ /' | |
| echo | |
| else | |
| sed -n '1,10p' "$resp_file" | sed 's/^/ /' | |
| fi | |
| return 0 | |
| fi | |
| if ! command -v jq >/dev/null 2>&1; then | |
| echo "Assistant:" | |
| echo " (jq not installed; raw body preview)" | |
| sed -n '1,20p' "$resp_file" | sed 's/^/ /' | |
| echo "Usage:" | |
| echo " (jq not installed)" | |
| echo "Meta:" | |
| echo " http_code: $http" | |
| echo " bytes: $bytes" | |
| echo " curl_time: ${time_total:-n/a} s" | |
| return 0 | |
| fi | |
| local content prompt_tokens completion_tokens total_tokens | |
| content="$(jq -r '(.choices|last|.message|.content) // .choices[0].message.content // empty' "$resp_file" 2>/dev/null || true)" | |
| prompt_tokens="$(jq -r '.usage.prompt_tokens // empty' "$resp_file" 2>/dev/null || true)" | |
| completion_tokens="$(jq -r '.usage.completion_tokens // empty' "$resp_file" 2>/dev/null || true)" | |
| total_tokens="$(jq -r '.usage.total_tokens // empty' "$resp_file" 2>/dev/null || true)" | |
| echo "Assistant:" | |
| if [[ -n "$content" ]]; then | |
| printf "%s\n" "$content" | sed $'s/\r$//' | sed 's/^/ /' | |
| else | |
| echo " (no content in JSON)" | |
| fi | |
| echo "Usage:" | |
| echo " prompt_tokens: ${prompt_tokens:-n/a}" | |
| echo " completion_tokens: ${completion_tokens:-n/a}" | |
| echo " total_tokens: ${total_tokens:-n/a}" | |
| echo "Meta:" | |
| echo " http_code: $http" | |
| echo " bytes: $bytes" | |
| echo " curl_time: ${time_total:-n/a} s" | |
| } | |
| is_loaded() { | |
| lms ps 2>/dev/null | grep -F "$1" >/dev/null || return 1 | |
| } | |
| ensure_loaded() { | |
| local ident="$1" | |
| if is_loaded "$ident"; then | |
| echo "Already loaded: $ident" | |
| return 0 | |
| fi | |
| echo "Loading: $ident" | |
| if ((${#LMS_FLAGS[@]})); then | |
| lms load "$BASE_MODEL_KEY" --identifier "$ident" "${LMS_FLAGS[@]}" >/dev/null | |
| else | |
| lms load "$BASE_MODEL_KEY" --identifier "$ident" >/dev/null | |
| fi | |
| } | |
| # -------------------------- | |
| # Progress bar for sequential scenario | |
| # Fills only when a model returns HTTP 200 (i.e., send() exit code 0) | |
| # -------------------------- | |
| draw_progress() { | |
| local done="$1" total="$2" width="${3:-40}" | |
| local filled=$(( (done * width) / total )) | |
| local empty=$(( width - filled )) | |
| local filled_str empty_str | |
| printf -v filled_str '%*s' "$filled" '' | |
| filled_str=${filled_str// /#} | |
| printf -v empty_str '%*s' "$empty" '' | |
| empty_str=${empty_str// /-} | |
| # print to stdout (live), carriage-returned | |
| printf "\r[%s%s] %d/%d complete" "$filled_str" "$empty_str" "$done" "$total" | |
| } | |
| # -------------------------- | |
| # Load models once | |
| # -------------------------- | |
| echo "== Ensuring models are loaded via lms ==" | |
| for ident in "${IDENTS[@]}"; do | |
| ensure_loaded "$ident" | |
| done | |
| echo "OK." | |
| echo | |
| # -------------------------- | |
| # Scenario runner with logging | |
| # -------------------------- | |
| run_scenario() { | |
| local scenario="$1" | |
| local logfile="$OUTDIR/${scenario}_results.log" | |
| local timefile="${scenario}_times.tmp" | |
| rm -f "$logfile" "$timefile" | |
| for r in $(seq 1 "$RUNS"); do | |
| echo "----- RUN #$r -----" >> "$logfile" | |
| echo "Run $r..." | |
| local start end elapsed | |
| start="$(now)" | |
| case "$scenario" in | |
| single) | |
| { | |
| local resp="tmp_${scenario}_resp_${r}.json" | |
| local meta="tmp_${scenario}_meta_${r}.txt" | |
| if send "${IDENTS[0]}" "$meta" "$resp"; then | |
| format_block "${IDENTS[0]}" "$resp" "$meta" | |
| else | |
| format_block "${IDENTS[0]}" "$resp" "$meta" | |
| fi | |
| } >> "$logfile" | |
| ;; | |
| sequential) | |
| local idx=1 | |
| local done_count=0 | |
| local total_models="${#IDENTS[@]}" | |
| # initial bar | |
| draw_progress "$done_count" "$total_models" | |
| for m in "${IDENTS[@]}"; do | |
| local resp="tmp_${scenario}_resp_${r}_${idx}.json" | |
| local meta="tmp_${scenario}_meta_${r}_${idx}.txt" | |
| local success=0 | |
| if send "$m" "$meta" "$resp"; then | |
| success=1 | |
| fi | |
| # Always log the formatted block | |
| format_block "$m" "$resp" "$meta" >> "$logfile" | |
| # Only count full/OK responses toward the bar | |
| if (( success )); then | |
| ((done_count++)) | |
| fi | |
| draw_progress "$done_count" "$total_models" | |
| ((idx++)) | |
| done | |
| # finish the line | |
| echo | |
| ;; | |
| parallel) | |
| local idx=1 | |
| declare -a pids=() | |
| declare -a models=() | |
| declare -a resps=() | |
| declare -a metas=() | |
| for m in "${IDENTS[@]}"; do | |
| local resp="tmp_${scenario}_resp_${r}_${idx}.json" | |
| local meta="tmp_${scenario}_meta_${r}_${idx}.txt" | |
| models+=("$m"); resps+=("$resp"); metas+=("$meta") | |
| ( send "$m" "$meta" "$resp" ) & pids+=($!) | |
| ((idx++)) | |
| done | |
| for pid in "${pids[@]}"; do wait "$pid" || true; done | |
| for i in "${!models[@]}"; do | |
| format_block "${models[$i]}" "${resps[$i]}" "${metas[$i]}" >> "$logfile" | |
| done | |
| ;; | |
| esac | |
| end="$(now)" | |
| elapsed="$(echo "$end - $start" | bc)" | |
| { | |
| echo "ELAPSED: $elapsed s" | |
| echo | |
| } >> "$logfile" | |
| echo "$elapsed" >> "$timefile" | |
| done | |
| echo "$scenario total wall time:" | |
| stats < "$timefile" | |
| } | |
| # -------------------------- | |
| # Run scenarios | |
| # -------------------------- | |
| if (( DO_SINGLE )); then | |
| echo "=== SINGLE (${IDENTS[0]}) ===" | |
| run_scenario "single" | |
| echo | |
| fi | |
| if (( DO_SEQ )); then | |
| echo "=== SEQUENTIAL (${#IDENTS[@]} models) ===" | |
| run_scenario "sequential" | |
| echo | |
| fi | |
| if (( DO_PAR )); then | |
| echo "=== PARALLEL (${#IDENTS[@]} models) ===" | |
| run_scenario "parallel" | |
| echo | |
| fi | |
| # -------------------------- | |
| # Summary | |
| # -------------------------- | |
| echo "=== SUMMARY ===" | |
| if (( DO_SINGLE )); then printf "SINGLE: "; stats < single_times.tmp; fi | |
| if (( DO_SEQ )); then printf "SEQUENTIAL: "; stats < sequential_times.tmp; fi | |
| if (( DO_PAR )); then printf "PARALLEL: "; stats < parallel_times.tmp; fi | |
| echo | |
| echo "Logs saved to: $OUTDIR/" | |
| echo "Hint: if your server is on a different port, run with: --endpoint http://localhost:1234/v1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment