ibehnam · October 27, 2025 00:57
diff --git a/parallel_agents_macstudio.sh b/parallel_agents_macstudio.sh
 #!/usr/bin/env bash
 set -euo pipefail

 # --------------------------
 # Defaults
 # --------------------------
 BASE_MODEL_KEY="openai/gpt-oss-20b"
 BASE_IDENTIFIER="openai/gpt-oss-20b"
 N=4
 RUNS=5
 DO_SINGLE=0
 DO_SEQ=1
 DO_PAR=1
 # Default to user's earlier port; override with --endpoint if needed
 ENDPOINT="http://localhost:8080/v1"

 OUTDIR="./benchmark_logs"
 mkdir -p "$OUTDIR"

 # Optional flags to pass to lms load
 declare -a LMS_FLAGS=()

 usage() {
  cat <<USAGE
 Usage: $0 [options]

 Benchmarking:
  --n N              Number of LLMs to use (inclusive): BASE plus :2..:N. Default: $N
  --runs R           Number of runs per scenario. Default: $RUNS
  --single           Include single-model scenario (BASE only)
  --no-seq           Skip sequential scenario
  --no-par           Skip parallel scenario
  --endpoint URL     OpenAI-compatible endpoint (default: $ENDPOINT)

 Model load (passed to 'lms load'):
  --model-key KEY
  --gpu <ratio|max|off>
  --context-length <num>
  --ttl <seconds>
  --host <host>
  --port <port>
  --yes
  --exact

 Other:
  -h, --help         Show this help

 Logs are saved in: $OUTDIR/
 USAGE
 }

 # --------------------------
 # Arg parsing
 # --------------------------
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --n) N="${2:?}"; shift 2 ;;
    --runs) RUNS="${2:?}"; shift 2 ;;
    --single) DO_SINGLE=1; shift ;;
    --no-seq) DO_SEQ=0; shift ;;
    --no-par) DO_PAR=0; shift ;;
    --endpoint) ENDPOINT="${2:?}"; shift 2 ;;
    --model-key) BASE_MODEL_KEY="${2:?}"; shift 2 ;;
    --gpu|--context-length|--ttl|--host|--port|--yes|--exact)
      if [[ "$1" == "--yes" || "$1" == "--exact" ]]; then
        LMS_FLAGS+=("$1"); shift
      else
        LMS_FLAGS+=("$1" "${2:?}"); shift 2
      fi
      ;;
    -h|--help) usage; exit 0 ;;
    *) echo "Unknown arg: $1"; usage; exit 1 ;;
  esac
 done

 # --------------------------
 # Build model identifiers (inclusive N)
 #   N=1   -> [BASE]
 #   N>=2  -> [BASE, :2 .. :N]
 # --------------------------
 IDENTS=("$BASE_IDENTIFIER")
 if (( N >= 2 )); then
  for (( i=2; i<=N; i++ )); do
    IDENTS+=("${BASE_IDENTIFIER}:$i")
  done
 fi

 # --------------------------
 # Helpers
 # --------------------------
 stats() {
  awk '
  { x[NR]=$1; s+=$1 }
  END {
    if (NR==0){ print "avg = n/a, std = n/a"; exit }
    avg=s/NR
    for(i=1;i<=NR;i++){ sd+=(x[i]-avg)^2 }
    sd=sqrt(sd/NR)
    printf "avg = %.3f s, std = %.3f s\n", avg, sd
  }'
 }

 now() { date +%s.%N; }

 # send <model> <meta_outfile> <resp_outfile>
 # One HTTP call:
 #   - Response body -> <resp_outfile>
 #   - Meta line "HTTP=<code> BYTES=<size> TIME=<time_total>" -> <meta_outfile>
 send() {
  local model="$1"
  local meta_out="$2"
  local resp_out="$3"

  set +e
  local meta
  meta="$(curl "$ENDPOINT/chat/completions" \
    -H "Content-Type: application/json" \
    --fail-with-body \
    -sS -o "$resp_out" \
    -w 'HTTP=%{http_code} BYTES=%{size_download} TIME=%{time_total}\n' \
    --data @- <<JSON
 {
  "model": "$model",
  "messages": [
    { "role": "system", "content": "Always answer in rhymes. Today is Thursday" },
    { "role": "user", "content": "tell me a story" }
  ],
  "temperature": 0.0,
  "seed": 42,
  "max_tokens": -1,
  "stream": false
 }
 JSON
  )"
  local exit_code=$?
  set -e

  echo "$meta" > "$meta_out"
  return "$exit_code"
 }

 # Pretty log the assistant message + usage + meta
 format_block() {
  local model="$1" resp_file="$2" meta_file="$3"

  local http bytes time_total
  http="$(awk '{for(i=1;i<=NF;i++){if($i~/^HTTP=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")"
  bytes="$(awk '{for(i=1;i<=NF;i++){if($i~/^BYTES=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")"
  time_total="$(awk '{for(i=1;i<=NF;i++){if($i~/^TIME=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null || echo "")"

  echo "[Model $model]"
  if [[ "$http" != "200" ]]; then
    echo "ERROR:"
    echo "  http_code: ${http:-n/a}"
    echo "  bytes: ${bytes:-n/a}"
    echo "  curl_time: ${time_total:-n/a} s"
    echo "  body (first 200 chars):"
    if command -v head >/dev/null 2>&1; then
      head -c 200 "$resp_file" | sed 's/^/    /'
      echo
    else
      sed -n '1,10p' "$resp_file" | sed 's/^/    /'
    fi
    return 0
  fi

  if ! command -v jq >/dev/null 2>&1; then
    echo "Assistant:"
    echo "  (jq not installed; raw body preview)"
    sed -n '1,20p' "$resp_file" | sed 's/^/  /'
    echo "Usage:"
    echo "  (jq not installed)"
    echo "Meta:"
    echo "  http_code: $http"
    echo "  bytes: $bytes"
    echo "  curl_time: ${time_total:-n/a} s"
    return 0
  fi

  local content prompt_tokens completion_tokens total_tokens
  content="$(jq -r '(.choices|last|.message|.content) // .choices[0].message.content // empty' "$resp_file" 2>/dev/null || true)"
  prompt_tokens="$(jq -r '.usage.prompt_tokens // empty' "$resp_file" 2>/dev/null || true)"
  completion_tokens="$(jq -r '.usage.completion_tokens // empty' "$resp_file" 2>/dev/null || true)"
  total_tokens="$(jq -r '.usage.total_tokens // empty' "$resp_file" 2>/dev/null || true)"

  echo "Assistant:"
  if [[ -n "$content" ]]; then
    printf "%s\n" "$content" | sed $'s/\r$//' | sed 's/^/  /'
  else
    echo "  (no content in JSON)"
  fi
  echo "Usage:"
  echo "  prompt_tokens: ${prompt_tokens:-n/a}"
  echo "  completion_tokens: ${completion_tokens:-n/a}"
  echo "  total_tokens: ${total_tokens:-n/a}"
  echo "Meta:"
  echo "  http_code: $http"
  echo "  bytes: $bytes"
  echo "  curl_time: ${time_total:-n/a} s"
 }

 is_loaded() {
  lms ps 2>/dev/null | grep -F "$1" >/dev/null || return 1
 }

 ensure_loaded() {
  local ident="$1"
  if is_loaded "$ident"; then
    echo "Already loaded: $ident"
    return 0
  fi
  echo "Loading: $ident"
  if ((${#LMS_FLAGS[@]})); then
    lms load "$BASE_MODEL_KEY" --identifier "$ident" "${LMS_FLAGS[@]}" >/dev/null
  else
    lms load "$BASE_MODEL_KEY" --identifier "$ident" >/dev/null
  fi
 }

 # --------------------------
 # Progress bar for sequential scenario
 # Fills only when a model returns HTTP 200 (i.e., send() exit code 0)
 # --------------------------
 draw_progress() {
  local done="$1" total="$2" width="${3:-40}"
  local filled=$(( (done * width) / total ))
  local empty=$(( width - filled ))
  local filled_str empty_str
  printf -v filled_str '%*s' "$filled" ''
  filled_str=${filled_str// /#}
  printf -v empty_str '%*s' "$empty" ''
  empty_str=${empty_str// /-}
  # print to stdout (live), carriage-returned
  printf "\r[%s%s] %d/%d complete" "$filled_str" "$empty_str" "$done" "$total"
 }

 # --------------------------
 # Load models once
 # --------------------------
 echo "== Ensuring models are loaded via lms =="
 for ident in "${IDENTS[@]}"; do
  ensure_loaded "$ident"
 done
 echo "OK."
 echo

 # --------------------------
 # Scenario runner with logging
 # --------------------------
 run_scenario() {
  local scenario="$1"
  local logfile="$OUTDIR/${scenario}_results.log"
  local timefile="${scenario}_times.tmp"
  rm -f "$logfile" "$timefile"

  for r in $(seq 1 "$RUNS"); do
    echo "----- RUN #$r -----" >> "$logfile"
    echo "Run $r..."

    local start end elapsed
    start="$(now)"

    case "$scenario" in
      single)
        {
          local resp="tmp_${scenario}_resp_${r}.json"
          local meta="tmp_${scenario}_meta_${r}.txt"
          if send "${IDENTS[0]}" "$meta" "$resp"; then
            format_block "${IDENTS[0]}" "$resp" "$meta"
          else
            format_block "${IDENTS[0]}" "$resp" "$meta"
          fi
        } >> "$logfile"
        ;;
      sequential)
        local idx=1
        local done_count=0
        local total_models="${#IDENTS[@]}"

        # initial bar
        draw_progress "$done_count" "$total_models"

        for m in "${IDENTS[@]}"; do
          local resp="tmp_${scenario}_resp_${r}_${idx}.json"
          local meta="tmp_${scenario}_meta_${r}_${idx}.txt"

          local success=0
          if send "$m" "$meta" "$resp"; then
            success=1
          fi

          # Always log the formatted block
          format_block "$m" "$resp" "$meta" >> "$logfile"

          # Only count full/OK responses toward the bar
          if (( success )); then
            ((done_count++))
          fi

          draw_progress "$done_count" "$total_models"

          ((idx++))
        done
        # finish the line
        echo
        ;;
      parallel)
        local idx=1
        declare -a pids=()
        declare -a models=()
        declare -a resps=()
        declare -a metas=()
        for m in "${IDENTS[@]}"; do
          local resp="tmp_${scenario}_resp_${r}_${idx}.json"
          local meta="tmp_${scenario}_meta_${r}_${idx}.txt"
          models+=("$m"); resps+=("$resp"); metas+=("$meta")
          ( send "$m" "$meta" "$resp" ) & pids+=($!)
          ((idx++))
        done
        for pid in "${pids[@]}"; do wait "$pid" || true; done
        for i in "${!models[@]}"; do
          format_block "${models[$i]}" "${resps[$i]}" "${metas[$i]}" >> "$logfile"
        done
        ;;
    esac

    end="$(now)"
    elapsed="$(echo "$end - $start" | bc)"
    {
      echo "ELAPSED: $elapsed s"
      echo
    } >> "$logfile"
    echo "$elapsed" >> "$timefile"
  done

  echo "$scenario total wall time:"
  stats < "$timefile"
 }

 # --------------------------
 # Run scenarios
 # --------------------------
 if (( DO_SINGLE )); then
  echo "=== SINGLE (${IDENTS[0]}) ==="
  run_scenario "single"
  echo
 fi

 if (( DO_SEQ )); then
  echo "=== SEQUENTIAL (${#IDENTS[@]} models) ==="
  run_scenario "sequential"
  echo
 fi

 if (( DO_PAR )); then
  echo "=== PARALLEL (${#IDENTS[@]} models) ==="
  run_scenario "parallel"
  echo
 fi

 # --------------------------
 # Summary
 # --------------------------
 echo "=== SUMMARY ==="
 if (( DO_SINGLE )); then printf "SINGLE:     "; stats < single_times.tmp; fi
 if (( DO_SEQ ));    then printf "SEQUENTIAL: "; stats < sequential_times.tmp; fi
 if (( DO_PAR ));    then printf "PARALLEL:   "; stats < parallel_times.tmp; fi

 echo
 echo "Logs saved to: $OUTDIR/"
 echo "Hint: if your server is on a different port, run with: --endpoint http://localhost:1234/v1"
	#!/usr/bin/env bash
	set -euo pipefail

	# --------------------------
	# Defaults
	# --------------------------
	BASE_MODEL_KEY="openai/gpt-oss-20b"
	BASE_IDENTIFIER="openai/gpt-oss-20b"
	N=4
	RUNS=5
	DO_SINGLE=0
	DO_SEQ=1
	DO_PAR=1
	# Default to user's earlier port; override with --endpoint if needed
	ENDPOINT="http://localhost:8080/v1"

	OUTDIR="./benchmark_logs"
	mkdir -p "$OUTDIR"

	# Optional flags to pass to lms load
	declare -a LMS_FLAGS=()

	usage() {
	cat <<USAGE
	Usage: $0 [options]

	Benchmarking:
	--n N Number of LLMs to use (inclusive): BASE plus :2..:N. Default: $N
	--runs R Number of runs per scenario. Default: $RUNS
	--single Include single-model scenario (BASE only)
	--no-seq Skip sequential scenario
	--no-par Skip parallel scenario
	--endpoint URL OpenAI-compatible endpoint (default: $ENDPOINT)

	Model load (passed to 'lms load'):
	--model-key KEY
	--gpu <ratio\|max\|off>
	--context-length <num>
	--ttl <seconds>
	--host <host>
	--port <port>
	--yes
	--exact

	Other:
	-h, --help Show this help

	Logs are saved in: $OUTDIR/
	USAGE
	}

	# --------------------------
	# Arg parsing
	# --------------------------
	while [[ $# -gt 0 ]]; do
	case "$1" in
	--n) N="${2:?}"; shift 2 ;;
	--runs) RUNS="${2:?}"; shift 2 ;;
	--single) DO_SINGLE=1; shift ;;
	--no-seq) DO_SEQ=0; shift ;;
	--no-par) DO_PAR=0; shift ;;
	--endpoint) ENDPOINT="${2:?}"; shift 2 ;;
	--model-key) BASE_MODEL_KEY="${2:?}"; shift 2 ;;
	--gpu\|--context-length\|--ttl\|--host\|--port\|--yes\|--exact)
	if [[ "$1" == "--yes" \|\| "$1" == "--exact" ]]; then
	LMS_FLAGS+=("$1"); shift
	else
	LMS_FLAGS+=("$1" "${2:?}"); shift 2
	fi
	;;
	-h\|--help) usage; exit 0 ;;
	*) echo "Unknown arg: $1"; usage; exit 1 ;;
	esac
	done

	# --------------------------
	# Build model identifiers (inclusive N)
	# N=1 -> [BASE]
	# N>=2 -> [BASE, :2 .. :N]
	# --------------------------
	IDENTS=("$BASE_IDENTIFIER")
	if (( N >= 2 )); then
	for (( i=2; i<=N; i++ )); do
	IDENTS+=("${BASE_IDENTIFIER}:$i")
	done
	fi

	# --------------------------
	# Helpers
	# --------------------------
	stats() {
	awk '
	{ x[NR]=$1; s+=$1 }
	END {
	if (NR==0){ print "avg = n/a, std = n/a"; exit }
	avg=s/NR
	for(i=1;i<=NR;i++){ sd+=(x[i]-avg)^2 }
	sd=sqrt(sd/NR)
	printf "avg = %.3f s, std = %.3f s\n", avg, sd
	}'
	}

	now() { date +%s.%N; }

	# send <model> <meta_outfile> <resp_outfile>
	# One HTTP call:
	# - Response body -> <resp_outfile>
	# - Meta line "HTTP=<code> BYTES=<size> TIME=<time_total>" -> <meta_outfile>
	send() {
	local model="$1"
	local meta_out="$2"
	local resp_out="$3"

	set +e
	local meta
	meta="$(curl "$ENDPOINT/chat/completions" \
	-H "Content-Type: application/json" \
	--fail-with-body \
	-sS -o "$resp_out" \
	-w 'HTTP=%{http_code} BYTES=%{size_download} TIME=%{time_total}\n' \
	--data @- <<JSON
	{
	"model": "$model",
	"messages": [
	{ "role": "system", "content": "Always answer in rhymes. Today is Thursday" },
	{ "role": "user", "content": "tell me a story" }
	],
	"temperature": 0.0,
	"seed": 42,
	"max_tokens": -1,
	"stream": false
	}
	JSON
	)"
	local exit_code=$?
	set -e

	echo "$meta" > "$meta_out"
	return "$exit_code"
	}

	# Pretty log the assistant message + usage + meta
	format_block() {
	local model="$1" resp_file="$2" meta_file="$3"

	local http bytes time_total
	http="$(awk '{for(i=1;i<=NF;i++){if($i~/^HTTP=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null \|\| echo "")"
	bytes="$(awk '{for(i=1;i<=NF;i++){if($i~/^BYTES=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null \|\| echo "")"
	time_total="$(awk '{for(i=1;i<=NF;i++){if($i~/^TIME=/){split($i,a,"=");print a[2]}}}' "$meta_file" 2>/dev/null \|\| echo "")"

	echo "[Model $model]"
	if [[ "$http" != "200" ]]; then
	echo "ERROR:"
	echo " http_code: ${http:-n/a}"
	echo " bytes: ${bytes:-n/a}"
	echo " curl_time: ${time_total:-n/a} s"
	echo " body (first 200 chars):"
	if command -v head >/dev/null 2>&1; then
	head -c 200 "$resp_file" \| sed 's/^/ /'
	echo
	else
	sed -n '1,10p' "$resp_file" \| sed 's/^/ /'
	fi
	return 0
	fi

	if ! command -v jq >/dev/null 2>&1; then
	echo "Assistant:"
	echo " (jq not installed; raw body preview)"
	sed -n '1,20p' "$resp_file" \| sed 's/^/ /'
	echo "Usage:"
	echo " (jq not installed)"
	echo "Meta:"
	echo " http_code: $http"
	echo " bytes: $bytes"
	echo " curl_time: ${time_total:-n/a} s"
	return 0
	fi

	local content prompt_tokens completion_tokens total_tokens
	content="$(jq -r '(.choices\|last\|.message\|.content) // .choices[0].message.content // empty' "$resp_file" 2>/dev/null \|\| true)"
	prompt_tokens="$(jq -r '.usage.prompt_tokens // empty' "$resp_file" 2>/dev/null \|\| true)"
	completion_tokens="$(jq -r '.usage.completion_tokens // empty' "$resp_file" 2>/dev/null \|\| true)"
	total_tokens="$(jq -r '.usage.total_tokens // empty' "$resp_file" 2>/dev/null \|\| true)"

	echo "Assistant:"
	if [[ -n "$content" ]]; then
	printf "%s\n" "$content" \| sed $'s/\r$//' \| sed 's/^/ /'
	else
	echo " (no content in JSON)"
	fi
	echo "Usage:"
	echo " prompt_tokens: ${prompt_tokens:-n/a}"
	echo " completion_tokens: ${completion_tokens:-n/a}"
	echo " total_tokens: ${total_tokens:-n/a}"
	echo "Meta:"
	echo " http_code: $http"
	echo " bytes: $bytes"
	echo " curl_time: ${time_total:-n/a} s"
	}

	is_loaded() {
	lms ps 2>/dev/null \| grep -F "$1" >/dev/null \|\| return 1
	}

	ensure_loaded() {
	local ident="$1"
	if is_loaded "$ident"; then
	echo "Already loaded: $ident"
	return 0
	fi
	echo "Loading: $ident"
	if ((${#LMS_FLAGS[@]})); then
	lms load "$BASE_MODEL_KEY" --identifier "$ident" "${LMS_FLAGS[@]}" >/dev/null
	else
	lms load "$BASE_MODEL_KEY" --identifier "$ident" >/dev/null
	fi
	}

	# --------------------------
	# Progress bar for sequential scenario
	# Fills only when a model returns HTTP 200 (i.e., send() exit code 0)
	# --------------------------
	draw_progress() {
	local done="$1" total="$2" width="${3:-40}"
	local filled=$(( (done * width) / total ))
	local empty=$(( width - filled ))
	local filled_str empty_str
	printf -v filled_str '%*s' "$filled" ''
	filled_str=${filled_str// /#}
	printf -v empty_str '%*s' "$empty" ''
	empty_str=${empty_str// /-}
	# print to stdout (live), carriage-returned
	printf "\r[%s%s] %d/%d complete" "$filled_str" "$empty_str" "$done" "$total"
	}

	# --------------------------
	# Load models once
	# --------------------------
	echo "== Ensuring models are loaded via lms =="
	for ident in "${IDENTS[@]}"; do
	ensure_loaded "$ident"
	done
	echo "OK."
	echo

	# --------------------------
	# Scenario runner with logging
	# --------------------------
	run_scenario() {
	local scenario="$1"
	local logfile="$OUTDIR/${scenario}_results.log"
	local timefile="${scenario}_times.tmp"
	rm -f "$logfile" "$timefile"

	for r in $(seq 1 "$RUNS"); do
	echo "----- RUN #$r -----" >> "$logfile"
	echo "Run $r..."

	local start end elapsed
	start="$(now)"

	case "$scenario" in
	single)
	{
	local resp="tmp_${scenario}_resp_${r}.json"
	local meta="tmp_${scenario}_meta_${r}.txt"
	if send "${IDENTS[0]}" "$meta" "$resp"; then
	format_block "${IDENTS[0]}" "$resp" "$meta"
	else
	format_block "${IDENTS[0]}" "$resp" "$meta"
	fi
	} >> "$logfile"
	;;
	sequential)
	local idx=1
	local done_count=0
	local total_models="${#IDENTS[@]}"

	# initial bar
	draw_progress "$done_count" "$total_models"

	for m in "${IDENTS[@]}"; do
	local resp="tmp_${scenario}_resp_${r}_${idx}.json"
	local meta="tmp_${scenario}_meta_${r}_${idx}.txt"

	local success=0
	if send "$m" "$meta" "$resp"; then
	success=1
	fi

	# Always log the formatted block
	format_block "$m" "$resp" "$meta" >> "$logfile"

	# Only count full/OK responses toward the bar
	if (( success )); then
	((done_count++))
	fi

	draw_progress "$done_count" "$total_models"

	((idx++))
	done
	# finish the line
	echo
	;;
	parallel)
	local idx=1
	declare -a pids=()
	declare -a models=()
	declare -a resps=()
	declare -a metas=()
	for m in "${IDENTS[@]}"; do
	local resp="tmp_${scenario}_resp_${r}_${idx}.json"
	local meta="tmp_${scenario}_meta_${r}_${idx}.txt"
	models+=("$m"); resps+=("$resp"); metas+=("$meta")
	( send "$m" "$meta" "$resp" ) & pids+=($!)
	((idx++))
	done
	for pid in "${pids[@]}"; do wait "$pid" \|\| true; done
	for i in "${!models[@]}"; do
	format_block "${models[$i]}" "${resps[$i]}" "${metas[$i]}" >> "$logfile"
	done
	;;
	esac

	end="$(now)"
	elapsed="$(echo "$end - $start" \| bc)"
	{
	echo "ELAPSED: $elapsed s"
	echo
	} >> "$logfile"
	echo "$elapsed" >> "$timefile"
	done

	echo "$scenario total wall time:"
	stats < "$timefile"
	}

	# --------------------------
	# Run scenarios
	# --------------------------
	if (( DO_SINGLE )); then
	echo "=== SINGLE (${IDENTS[0]}) ==="
	run_scenario "single"
	echo
	fi

	if (( DO_SEQ )); then
	echo "=== SEQUENTIAL (${#IDENTS[@]} models) ==="
	run_scenario "sequential"
	echo
	fi

	if (( DO_PAR )); then
	echo "=== PARALLEL (${#IDENTS[@]} models) ==="
	run_scenario "parallel"
	echo
	fi

	# --------------------------
	# Summary
	# --------------------------
	echo "=== SUMMARY ==="
	if (( DO_SINGLE )); then printf "SINGLE: "; stats < single_times.tmp; fi
	if (( DO_SEQ )); then printf "SEQUENTIAL: "; stats < sequential_times.tmp; fi
	if (( DO_PAR )); then printf "PARALLEL: "; stats < parallel_times.tmp; fi

	echo
	echo "Logs saved to: $OUTDIR/"
	echo "Hint: if your server is on a different port, run with: --endpoint http://localhost:1234/v1"