HDCharles · April 30, 2026 14:58
diff --git a/quantize.py b/quantize.py
 import argparse
 import os
 import time

 import torch
 from compressed_tensors.offload import dispatch_model
 from compressed_tensors.quantization import preset_name_to_scheme
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer

 # Monkey-patch os.chmod to ignore permission errors on shared cache
 _original_chmod = os.chmod
 def _chmod_ignore_errors(path, mode):
    try:
        _original_chmod(path, mode)
    except PermissionError:
        pass  # Silently ignore chmod errors on shared cache files
 os.chmod = _chmod_ignore_errors

 from llmcompressor import oneshot
 from llmcompressor.modifiers.gptq import GPTQModifier
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.modifiers.transform.awq import AWQModifier
 from llmcompressor.modifiers.transform.imatrix import IMatrixGatherer
 from llmcompressor.modifiers.transform.smoothquant import SmoothQuantModifier

 MODEL_CONFIGS = {
    "meta-llama/Meta-Llama-3-8B-Instruct": {
        "ignore": ["lm_head"],
        "is_moe": False,
    },
    "Qwen/Qwen3-30B-A3B": {
        "ignore": ["lm_head", "re:.*mlp.gate$", "re:.*mlp.shared_expert_gate$"],
        "is_moe": True,
    },
 }

 DATASET_ID = "HuggingFaceH4/ultrachat_200k"
 DATASET_SPLIT = "train_sft"


 def build_recipe(technique, scheme, ignore, is_moe):
    if technique == "awq_rtn":
        duo = "both" if not is_moe else False
        return [
            AWQModifier(duo_scaling=duo),
            QuantizationModifier(
                ignore=ignore, scheme=scheme, targets=["Linear"]
            ),
        ]
    elif technique == "rtn":
        return [
            QuantizationModifier(
                ignore=ignore, scheme=scheme, targets=["Linear"]
            ),
        ]
    elif technique == "rtn_mse":
        scheme_obj = preset_name_to_scheme(scheme, ["Linear"])
        scheme_obj.weights.observer = "memoryless_mse"
        return [
            QuantizationModifier(
                config_groups={"group_0": scheme_obj},
                ignore=ignore,
            ),
        ]
    elif technique == "gptq":
        recipe = []
        if "W8A8" in scheme:
            recipe.append(SmoothQuantModifier(smoothing_strength=0.8))
        recipe.append(
            GPTQModifier(ignore=ignore, scheme=scheme, targets=["Linear"])
        )
        return recipe
    elif technique == "imatrix":
        scheme_obj = preset_name_to_scheme(scheme, ["Linear"])
        scheme_obj.weights.observer = "imatrix_mse"
        return [
            IMatrixGatherer(ignore=ignore),
            QuantizationModifier(
                config_groups={"group_0": scheme_obj},
                ignore=ignore,
            ),
        ]
    else:
        raise ValueError(f"Unknown technique: {technique}")


 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", required=True)
    parser.add_argument(
        "--technique", required=True,
        choices=["awq_rtn", "rtn", "rtn_mse", "gptq", "imatrix"],
    )
    parser.add_argument("--scheme", required=True)
    parser.add_argument("--save-dir", required=True)
    parser.add_argument("--num-samples", type=int, default=256)
    parser.add_argument("--max-seq-length", type=int, default=512)
    args = parser.parse_args()

    config = MODEL_CONFIGS.get(args.model)
    if config is None:
        raise ValueError(
            f"Unknown model: {args.model}. "
            f"Known models: {list(MODEL_CONFIGS.keys())}"
        )

    model = AutoModelForCausalLM.from_pretrained(args.model, dtype="auto")
    tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)

    ds = load_dataset(DATASET_ID, split=f"{DATASET_SPLIT}[:{args.num_samples}]")
    ds = ds.shuffle(seed=42)

    def preprocess(example):
        return {
            "text": tokenizer.apply_chat_template(
                example["messages"],
                tokenize=False,
            )
        }

    ds = ds.map(preprocess)

    def tokenize(sample):
        return tokenizer(
            sample["text"],
            padding=False,
            max_length=args.max_seq_length,
            truncation=True,
            add_special_tokens=False,
        )

    ds = ds.map(tokenize, remove_columns=ds.column_names)

    recipe = build_recipe(
        args.technique, args.scheme, config["ignore"], config["is_moe"]
    )

    torch.cuda.reset_peak_memory_stats()
    start_time = time.time()

    oneshot_kwargs = dict(
        model=model,
        dataset=ds,
        recipe=recipe,
        max_seq_length=args.max_seq_length,
        num_calibration_samples=args.num_samples,
    )

    oneshot(**oneshot_kwargs)

    elapsed_time = time.time() - start_time
    peak_memory_gb = torch.cuda.max_memory_allocated() / (1024**3)
    print("Quantization Complete")
    print(f"Technique: {args.technique}, Scheme: {args.scheme}")
    print(f"Time: {elapsed_time / 60:.2f} minutes ({elapsed_time:.2f} seconds)")
    print(f"Peak GPU Memory: {peak_memory_gb:.2f} GB")

    model.save_pretrained(args.save_dir, save_compressed=True)
    tokenizer.save_pretrained(args.save_dir)
    print(f"Model saved to {args.save_dir}")


 if __name__ == "__main__":
    main()
diff --git a/run_all_tests.sh b/run_all_tests.sh
 #!/bin/bash
 # Observer Refactoring Regression Test Suite
 # Tests all combinations of models x techniques x schemes x branches.
 # For each combo, checks out main and the PR branch, reinstalls, quantizes, and evals.
 #
 # Usage:
 #   ./run_all_tests.sh 2>&1 | tee regression_results.log
 #   python extract_log_summary.py regression_results.log

 set -o pipefail

 # Use shared hub_cache directory on /raid/engine
 export HF_DATASETS_CACHE="/raid/engine/hub_cache"
 mkdir -p "$HF_DATASETS_CACHE"

 # ── Configuration ────────────────────────────────────────────────────────────

 REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"

 MODELS=("meta-llama/Meta-Llama-3-8B-Instruct" "Qwen/Qwen3-30B-A3B")
 MODEL_SHORT_NAMES=("Meta-Llama-3-8B-Instruct" "Qwen3-30B-A3B")
 # max_model_len,tensor_parallel_size,num_gpus_quant
 MODEL_VLLM_ARGS=("2048,1,1" "2048,2,1")

 TECHNIQUES=("awq_rtn" "rtn" "rtn_mse" "gptq" "imatrix")

 SCHEMES=("NVFP4")

 BRANCHES=("main" "90_refactor_obs")

 EVAL_NAMES=("gsm8k_platinum" "wikitext" "mmlu")
 EVAL_LM_TASKS=("gsm8k_platinum" "wikitext" "mmlu")
 EVAL_FEWSHOT=("5" "0" "5")
 EVAL_BACKENDS=("vllm" "vllm" "vllm")

 EVAL_BASE_DIR="./eval_results"
 MODEL_BASE_DIR="./regression_models"
 RESULTS_CSV="regression_results.csv"

 ORIGINAL_BRANCH=$(git -C "$REPO_DIR" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown")

 mkdir -p "$EVAL_BASE_DIR" "$MODEL_BASE_DIR"

 # ── Helper: activate environments ────────────────────────────────────────────

 activate_quant_env() {
    source /home/HDCharles/rhdev/bin/activate
 }

 activate_eval_env() {
    source /home/HDCharles/vllm/bin/activate
 }

 # ── Helper: checkout branch and reinstall ────────────────────────────────────

 switch_branch() {
    local branch=$1
    echo "  Switching to branch: $branch"
    git -C "$REPO_DIR" checkout "$branch" 2>&1
    if [ $? -ne 0 ]; then
        echo "  ERROR: git checkout $branch failed"
        return 1
    fi
    activate_quant_env
    pip install -e "$REPO_DIR" 2>&1 | tail -1
    echo "  Installed llm-compressor from branch $branch"
 }

 # ── Helper: run vLLM evaluation with fallback chain ──────────────────────────

 EVAL_BACKEND=""

 run_vllm_eval() {
    local save_dir=$1
    local task=$2
    local num_fewshot=$3
    local max_model_len=$4
    local tp_size=$5
    local eval_output_dir=$6

    mkdir -p "$eval_output_dir"
    EVAL_BACKEND="FAILED"

    activate_eval_env

    echo "  EVAL: $task (fewshot=$num_fewshot, tp=$tp_size, max_len=$max_model_len)"

    local chat_args="--apply_chat_template"
    if [ "$num_fewshot" -gt 0 ]; then
        chat_args="$chat_args --fewshot_as_multiturn"
    fi

    if [ "$tp_size" -gt 1 ]; then
        lm_eval \
            --model vllm \
            --model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,tensor_parallel_size=$tp_size,gpu_memory_utilization=0.85" \
            --tasks "$task" \
            --num_fewshot "$num_fewshot" \
            --batch_size auto \
            $chat_args \
            --output_path "$eval_output_dir" 2>&1
        if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_tp${tp_size}"; return 0; fi

        echo "  TP=$tp_size failed, trying expert_parallel..."
        lm_eval \
            --model vllm \
            --model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,enable_expert_parallel=True,gpu_memory_utilization=0.85" \
            --tasks "$task" \
            --num_fewshot "$num_fewshot" \
            --batch_size auto \
            $chat_args \
            --output_path "$eval_output_dir" 2>&1
        if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_expert_parallel"; return 0; fi
    fi

    echo "  Trying TP=1..."
    lm_eval \
        --model vllm \
        --model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,gpu_memory_utilization=0.85" \
        --tasks "$task" \
        --num_fewshot "$num_fewshot" \
        --batch_size auto \
        $chat_args \
        --output_path "$eval_output_dir" 2>&1
    if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_tp1"; return 0; fi

    echo "  Trying enforce_eager..."
    lm_eval \
        --model vllm \
        --model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,enforce_eager=True,gpu_memory_utilization=0.85" \
        --tasks "$task" \
        --num_fewshot "$num_fewshot" \
        --batch_size auto \
        $chat_args \
        --output_path "$eval_output_dir" 2>&1
    if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_eager"; return 0; fi

    echo "  Trying hf backend as last resort..."
    lm_eval \
        --model hf \
        --model_args "pretrained=$save_dir,dtype=auto,add_bos_token=True" \
        --tasks "$task" \
        --num_fewshot "$num_fewshot" \
        --batch_size auto \
        $chat_args \
        --output_path "$eval_output_dir" 2>&1
    if [ $? -eq 0 ]; then EVAL_BACKEND="hf"; return 0; fi

    EVAL_BACKEND="FAILED"
    return 1
 }

 # ── Helper: run HF-only evaluation ─────────────────────────────────────────

 run_hf_eval() {
    local save_dir=$1
    local task=$2
    local num_fewshot=$3
    local eval_output_dir=$4

    mkdir -p "$eval_output_dir"
    EVAL_BACKEND="FAILED"

    activate_eval_env

    echo "  EVAL: $task (fewshot=$num_fewshot, backend=hf)"

    local chat_args="--apply_chat_template"
    if [ "$num_fewshot" -gt 0 ]; then
        chat_args="$chat_args --fewshot_as_multiturn"
    fi

    lm_eval \
        --model hf \
        --model_args "pretrained=$save_dir,dtype=auto,add_bos_token=True" \
        --tasks "$task" \
        --num_fewshot "$num_fewshot" \
        --batch_size auto \
        $chat_args \
        --output_path "$eval_output_dir" 2>&1
    if [ $? -eq 0 ]; then EVAL_BACKEND="hf"; return 0; fi

    EVAL_BACKEND="FAILED"
    return 1
 }

 # ── Helper: extract metric from lm_eval JSON results ────────────────────────

 extract_metric() {
    local eval_output_dir=$1
    local task=$2

    local results_json
    results_json=$(find "$eval_output_dir" -name "results_*.json" -type f 2>/dev/null | sort | tail -1)

    if [ -z "$results_json" ]; then
        echo "N/A"
        return
    fi

    python3 -c "
 import json, sys
 with open('$results_json') as f:
    data = json.load(f)
 results = data.get('results', {})
 task = '$task'

 task_results = None
 for key in results:
    if task in key:
        task_results = results[key]
        break

 if task_results is None:
    print('N/A')
    sys.exit()

 if 'gsm8k' in task:
    val = task_results.get('exact_match,strict-match')
    if val is not None:
        print(f'{val*100:.2f}%')
    else:
        print('N/A')
 elif 'wikitext' in task:
    val = task_results.get('word_perplexity,none')
    if val is not None:
        print(f'{val:.2f}')
    else:
        print('N/A')
 elif 'mmlu' in task:
    val = task_results.get('acc,none')
    if val is not None:
        print(f'{val*100:.2f}%')
    else:
        print('N/A')
 else:
    for k, v in task_results.items():
        if 'stderr' not in k and k != 'alias' and isinstance(v, (int, float)):
            print(f'{v:.4f}')
            sys.exit()
    print('N/A')
 " 2>/dev/null || echo "N/A"
 }

 # ── Helper: print current results summary ────────────────────────────────────

 print_summary() {
    echo ""
    echo "╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗"
    echo "║  RESULTS SUMMARY (so far)                                                                         ║"
    echo "╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝"
    echo ""
    if [ -f "$RESULTS_CSV" ]; then
        column -t -s',' < "$RESULTS_CSV"
    else
        echo "(no results yet)"
    fi
    echo ""
    echo "════════════════════════════════════════════════════════════════════════════════════════════════════════"
    echo ""
 }

 # ── Helper: print branch comparison table ─────────────────────────────────

 print_comparison() {
    if [ ! -f "$RESULTS_CSV" ]; then
        return
    fi

    python3 - "$RESULTS_CSV" <<'PYEOF'
 import csv, sys

 csv_path = sys.argv[1]

 rows = []
 with open(csv_path) as f:
    reader = csv.DictReader(f)
    for r in reader:
        rows.append(r)

 if not rows:
    sys.exit()

 # Build lookup: (model, scheme, technique, task) -> {branch: metric}
 lookup = {}
 for r in rows:
    key = (r["model"], r["scheme"], r["technique"], r["task"])
    lookup.setdefault(key, {})
    lookup[key][r["branch"]] = r["metric"]

 entries = [(k, v) for k, v in lookup.items()
           if "main" in v and any(b != "main" for b in v)]
 if not entries:
    sys.exit()

 pr_branch = [b for b in next(iter(lookup.values())) if b != "main"]
 pr_branch = pr_branch[0] if pr_branch else "pr"

 def parse_metric(s):
    s = s.strip()
    if s.endswith("%"):
        return float(s[:-1]), True
    try:
        return float(s), False
    except ValueError:
        return None, False

 def calc_change(main_str, pr_str, task):
    m_val, _ = parse_metric(main_str)
    p_val, _ = parse_metric(pr_str)
    if m_val is None or p_val is None or m_val == 0:
        return "N/A"
    if "wikitext" in task:
        pct = (m_val - p_val) / m_val * 100
    else:
        pct = (p_val - m_val) / m_val * 100
    sign = "+" if pct >= 0 else ""
    return f"{sign}{pct:.2f}%"

 print("")
 print("╔════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗")
 print(f"║  BRANCH COMPARISON (main vs {pr_branch})")
 print("╚════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝")
 print("")

 header = (f"{'model':<30} {'scheme':<10} {'technique':<12} {'task':<18} "
          f"{'main':>14} {'PR':>14} {'change':>12}")
 print(header)
 print("-" * len(header))

 for (model, scheme, technique, task), metrics in sorted(entries):
    m = metrics.get("main", "")
    p = metrics.get(pr_branch, "")
    change = calc_change(m, p, task) if m and p else ""
    print(f"{model:<30} {scheme:<10} {technique:<12} {task:<18} "
          f"{m:>14} {p:>14} {change:>12}")

 print("")
 PYEOF
 }

 # ── Initialize results CSV ──────────────────────────────────────────────────

 if [ -f "$RESULTS_CSV" ]; then
    cp "$RESULTS_CSV" "${RESULTS_CSV}.bak"
 fi
 echo "model,scheme,technique,branch,task,metric,status,eval_backend,save_dir" > "$RESULTS_CSV"

 # ── Main loop ────────────────────────────────────────────────────────────────

 TOTAL=0
 PASSED=0
 FAILED=0

 for model_idx in "${!MODELS[@]}"; do
    model="${MODELS[$model_idx]}"
    model_short="${MODEL_SHORT_NAMES[$model_idx]}"
    IFS=',' read -r max_model_len tp_size num_gpus_quant <<< "${MODEL_VLLM_ARGS[$model_idx]}"

    for technique in "${TECHNIQUES[@]}"; do
        for scheme in "${SCHEMES[@]}"; do
            for branch in "${BRANCHES[@]}"; do

                save_dir="$MODEL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}"

                echo ""
                echo "╔══════════════════════════════════════════════════════════════════════════════════════════╗"
                echo "║  MODEL: $model_short"
                echo "║  SCHEME: $scheme"
                echo "║  TECHNIQUE: $technique"
                echo "║  BRANCH: $branch"
                echo "╚══════════════════════════════════════════════════════════════════════════════════════════╝"
                echo ""

                # ── Skip entirely if all evals already have results ────
                all_evals_cached=true
                for eval_idx in "${!EVAL_NAMES[@]}"; do
                    eval_dir="$EVAL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}/${EVAL_NAMES[$eval_idx]}"
                    if ! find "$eval_dir" -name "results_*.json" -type f 2>/dev/null | grep -q .; then
                        all_evals_cached=false
                        break
                    fi
                done
                if [ "$all_evals_cached" = true ]; then
                    echo "All evals already cached, skipping quantization and eval."
                    for eval_idx in "${!EVAL_NAMES[@]}"; do
                        eval_name="${EVAL_NAMES[$eval_idx]}"
                        lm_task="${EVAL_LM_TASKS[$eval_idx]}"
                        eval_dir="$EVAL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}/${eval_name}"
                        metric_val=$(extract_metric "$eval_dir" "$lm_task")
                        echo "  $eval_name: $metric_val"
                        echo "$model_short,$scheme,$technique,$branch,$eval_name,$metric_val,PASSED,cached,$save_dir" >> "$RESULTS_CSV"
                        PASSED=$((PASSED + 1))
                        TOTAL=$((TOTAL + 1))
                    done
                    print_summary
                    print_comparison
                    continue
                fi

                # ── Switch branch and reinstall ───────────────────────
                switch_branch "$branch"
                if [ $? -ne 0 ]; then
                    echo "BRANCH SWITCH FAILED for $branch"
                    for eval_name in "${EVAL_NAMES[@]}"; do
                        echo "$model_short,$scheme,$technique,$branch,$eval_name,N/A,BRANCH_FAILED,N/A,$save_dir" >> "$RESULTS_CSV"
                    done
                    FAILED=$((FAILED + ${#EVAL_NAMES[@]}))
                    TOTAL=$((TOTAL + ${#EVAL_NAMES[@]}))
                    print_summary
                    continue
                fi

                # ── Quantize (skip if model already exists) ────────────
                if [ -d "$save_dir" ] && [ -f "$save_dir/config.json" ]; then
                    echo "Quantized model already exists at $save_dir, skipping quantization."
                else
                    activate_quant_env

                    echo "============================================"
                    echo "Running: quantize.py (model=$model_short, technique=$technique, scheme=$scheme, branch=$branch)"
                    echo "============================================"

                    if [ "$num_gpus_quant" -gt 1 ]; then
                        torchrun --nproc_per_node="$num_gpus_quant" "$REPO_DIR/testing/quantize.py" \
                            --model "$model" --technique "$technique" --scheme "$scheme" \
                            --save-dir "$save_dir" 2>&1
                    else
                        python "$REPO_DIR/testing/quantize.py" \
                            --model "$model" --technique "$technique" --scheme "$scheme" \
                            --save-dir "$save_dir" 2>&1
                    fi

                    quant_status=$?
                    if [ $quant_status -ne 0 ]; then
                        echo "QUANTIZATION FAILED for $model_short / $scheme / $technique / $branch"
                        for eval_name in "${EVAL_NAMES[@]}"; do
                            echo "$model_short,$scheme,$technique,$branch,$eval_name,N/A,QUANT_FAILED,N/A,$save_dir" >> "$RESULTS_CSV"
                        done
                        FAILED=$((FAILED + ${#EVAL_NAMES[@]}))
                        TOTAL=$((TOTAL + ${#EVAL_NAMES[@]}))
                        print_summary
                        print_comparison
                        continue
                    fi
                fi

                # ── Clear GPU memory before eval ─────────────────────────
                python3 -c "import torch; torch.cuda.empty_cache(); [torch.cuda.reset_peak_memory_stats(i) for i in range(torch.cuda.device_count())]" 2>/dev/null

                # ── Evaluate ─────────────────────────────────────────────
                for eval_idx in "${!EVAL_NAMES[@]}"; do
                    eval_name="${EVAL_NAMES[$eval_idx]}"
                    lm_task="${EVAL_LM_TASKS[$eval_idx]}"
                    fewshot="${EVAL_FEWSHOT[$eval_idx]}"
                    backend="${EVAL_BACKENDS[$eval_idx]}"
                    eval_dir="$EVAL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}/${eval_name}"

                    TOTAL=$((TOTAL + 1))

                    # Skip eval if results already exist
                    existing_result=$(find "$eval_dir" -name "results_*.json" -type f 2>/dev/null | sort | tail -1)
                    if [ -n "$existing_result" ]; then
                        metric_val=$(extract_metric "$eval_dir" "$lm_task")
                        echo "  EVAL: $eval_name — skipping, previous result found: $metric_val"
                        echo "$model_short,$scheme,$technique,$branch,$eval_name,$metric_val,PASSED,cached,$save_dir" >> "$RESULTS_CSV"
                        PASSED=$((PASSED + 1))
                        continue
                    fi

                    if [ "$backend" == "hf" ]; then
                        run_hf_eval "$save_dir" "$lm_task" "$fewshot" "$eval_dir"
                    else
                        run_vllm_eval "$save_dir" "$lm_task" "$fewshot" "$max_model_len" "$tp_size" "$eval_dir"
                    fi
                    eval_status=$?

                    if [ $eval_status -eq 0 ]; then
                        metric_val=$(extract_metric "$eval_dir" "$lm_task")
                        echo "PASS EVAL"
                        echo "$model_short,$scheme,$technique,$branch,$eval_name,$metric_val,PASSED,$EVAL_BACKEND,$save_dir" >> "$RESULTS_CSV"
                        PASSED=$((PASSED + 1))
                    else
                        echo "FAIL EVAL"
                        echo "$model_short,$scheme,$technique,$branch,$eval_name,N/A,FAILED,$EVAL_BACKEND,$save_dir" >> "$RESULTS_CSV"
                        FAILED=$((FAILED + 1))
                    fi
                done

                # ── Clean up model to free disk space ────────────────────
                if [ -d "$save_dir" ]; then
                    echo "Removing quantized model at $save_dir to free disk space."
                    rm -rf "$save_dir"
                fi

                print_summary
                print_comparison

            done  # branch
        done  # scheme
    done  # technique
 done  # model

 # ── Restore original branch ─────────────────────────────────────────────────

 echo "Restoring original branch: $ORIGINAL_BRANCH"
 git -C "$REPO_DIR" checkout "$ORIGINAL_BRANCH" 2>&1
 activate_quant_env
 pip install -e "$REPO_DIR" 2>&1 | tail -1

 # ── Final Summary ────────────────────────────────────────────────────────────

 echo ""
 echo "╔══════════════════════════════════════════════════════════════════════════════════════════╗"
 echo "║  FINAL SUMMARY: $PASSED passed, $FAILED failed out of $TOTAL total evaluations         ║"
 echo "╚══════════════════════════════════════════════════════════════════════════════════════════╝"
 echo ""
 print_summary
 print_comparison
 echo "Results CSV: $RESULTS_CSV"
 echo "Eval outputs: $EVAL_BASE_DIR/"
	import argparse
	import os
	import time

	import torch
	from compressed_tensors.offload import dispatch_model
	from compressed_tensors.quantization import preset_name_to_scheme
	from datasets import load_dataset
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Monkey-patch os.chmod to ignore permission errors on shared cache
	_original_chmod = os.chmod
	def _chmod_ignore_errors(path, mode):
	try:
	_original_chmod(path, mode)
	except PermissionError:
	pass # Silently ignore chmod errors on shared cache files
	os.chmod = _chmod_ignore_errors

	from llmcompressor import oneshot
	from llmcompressor.modifiers.gptq import GPTQModifier
	from llmcompressor.modifiers.quantization import QuantizationModifier
	from llmcompressor.modifiers.transform.awq import AWQModifier
	from llmcompressor.modifiers.transform.imatrix import IMatrixGatherer
	from llmcompressor.modifiers.transform.smoothquant import SmoothQuantModifier

	MODEL_CONFIGS = {
	"meta-llama/Meta-Llama-3-8B-Instruct": {
	"ignore": ["lm_head"],
	"is_moe": False,
	},
	"Qwen/Qwen3-30B-A3B": {
	"ignore": ["lm_head", "re:.mlp.gate$", "re:.mlp.shared_expert_gate$"],
	"is_moe": True,
	},
	}

	DATASET_ID = "HuggingFaceH4/ultrachat_200k"
	DATASET_SPLIT = "train_sft"


	def build_recipe(technique, scheme, ignore, is_moe):
	if technique == "awq_rtn":
	duo = "both" if not is_moe else False
	return [
	AWQModifier(duo_scaling=duo),
	QuantizationModifier(
	ignore=ignore, scheme=scheme, targets=["Linear"]
	),
	]
	elif technique == "rtn":
	return [
	QuantizationModifier(
	ignore=ignore, scheme=scheme, targets=["Linear"]
	),
	]
	elif technique == "rtn_mse":
	scheme_obj = preset_name_to_scheme(scheme, ["Linear"])
	scheme_obj.weights.observer = "memoryless_mse"
	return [
	QuantizationModifier(
	config_groups={"group_0": scheme_obj},
	ignore=ignore,
	),
	]
	elif technique == "gptq":
	recipe = []
	if "W8A8" in scheme:
	recipe.append(SmoothQuantModifier(smoothing_strength=0.8))
	recipe.append(
	GPTQModifier(ignore=ignore, scheme=scheme, targets=["Linear"])
	)
	return recipe
	elif technique == "imatrix":
	scheme_obj = preset_name_to_scheme(scheme, ["Linear"])
	scheme_obj.weights.observer = "imatrix_mse"
	return [
	IMatrixGatherer(ignore=ignore),
	QuantizationModifier(
	config_groups={"group_0": scheme_obj},
	ignore=ignore,
	),
	]
	else:
	raise ValueError(f"Unknown technique: {technique}")


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--model", required=True)
	parser.add_argument(
	"--technique", required=True,
	choices=["awq_rtn", "rtn", "rtn_mse", "gptq", "imatrix"],
	)
	parser.add_argument("--scheme", required=True)
	parser.add_argument("--save-dir", required=True)
	parser.add_argument("--num-samples", type=int, default=256)
	parser.add_argument("--max-seq-length", type=int, default=512)
	args = parser.parse_args()

	config = MODEL_CONFIGS.get(args.model)
	if config is None:
	raise ValueError(
	f"Unknown model: {args.model}. "
	f"Known models: {list(MODEL_CONFIGS.keys())}"
	)

	model = AutoModelForCausalLM.from_pretrained(args.model, dtype="auto")
	tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)

	ds = load_dataset(DATASET_ID, split=f"{DATASET_SPLIT}[:{args.num_samples}]")
	ds = ds.shuffle(seed=42)

	def preprocess(example):
	return {
	"text": tokenizer.apply_chat_template(
	example["messages"],
	tokenize=False,
	)
	}

	ds = ds.map(preprocess)

	def tokenize(sample):
	return tokenizer(
	sample["text"],
	padding=False,
	max_length=args.max_seq_length,
	truncation=True,
	add_special_tokens=False,
	)

	ds = ds.map(tokenize, remove_columns=ds.column_names)

	recipe = build_recipe(
	args.technique, args.scheme, config["ignore"], config["is_moe"]
	)

	torch.cuda.reset_peak_memory_stats()
	start_time = time.time()

	oneshot_kwargs = dict(
	model=model,
	dataset=ds,
	recipe=recipe,
	max_seq_length=args.max_seq_length,
	num_calibration_samples=args.num_samples,
	)

	oneshot(**oneshot_kwargs)

	elapsed_time = time.time() - start_time
	peak_memory_gb = torch.cuda.max_memory_allocated() / (1024**3)
	print("Quantization Complete")
	print(f"Technique: {args.technique}, Scheme: {args.scheme}")
	print(f"Time: {elapsed_time / 60:.2f} minutes ({elapsed_time:.2f} seconds)")
	print(f"Peak GPU Memory: {peak_memory_gb:.2f} GB")

	model.save_pretrained(args.save_dir, save_compressed=True)
	tokenizer.save_pretrained(args.save_dir)
	print(f"Model saved to {args.save_dir}")


	if __name__ == "__main__":
	main()
	#!/bin/bash
	# Observer Refactoring Regression Test Suite
	# Tests all combinations of models x techniques x schemes x branches.
	# For each combo, checks out main and the PR branch, reinstalls, quantizes, and evals.
	#
	# Usage:
	# ./run_all_tests.sh 2>&1 \| tee regression_results.log
	# python extract_log_summary.py regression_results.log

	set -o pipefail

	# Use shared hub_cache directory on /raid/engine
	export HF_DATASETS_CACHE="/raid/engine/hub_cache"
	mkdir -p "$HF_DATASETS_CACHE"

	# ── Configuration ────────────────────────────────────────────────────────────

	REPO_DIR="$(cd "$(dirname "$0")/.." && pwd)"

	MODELS=("meta-llama/Meta-Llama-3-8B-Instruct" "Qwen/Qwen3-30B-A3B")
	MODEL_SHORT_NAMES=("Meta-Llama-3-8B-Instruct" "Qwen3-30B-A3B")
	# max_model_len,tensor_parallel_size,num_gpus_quant
	MODEL_VLLM_ARGS=("2048,1,1" "2048,2,1")

	TECHNIQUES=("awq_rtn" "rtn" "rtn_mse" "gptq" "imatrix")

	SCHEMES=("NVFP4")

	BRANCHES=("main" "90_refactor_obs")

	EVAL_NAMES=("gsm8k_platinum" "wikitext" "mmlu")
	EVAL_LM_TASKS=("gsm8k_platinum" "wikitext" "mmlu")
	EVAL_FEWSHOT=("5" "0" "5")
	EVAL_BACKENDS=("vllm" "vllm" "vllm")

	EVAL_BASE_DIR="./eval_results"
	MODEL_BASE_DIR="./regression_models"
	RESULTS_CSV="regression_results.csv"

	ORIGINAL_BRANCH=$(git -C "$REPO_DIR" rev-parse --abbrev-ref HEAD 2>/dev/null \|\| echo "unknown")

	mkdir -p "$EVAL_BASE_DIR" "$MODEL_BASE_DIR"

	# ── Helper: activate environments ────────────────────────────────────────────

	activate_quant_env() {
	source /home/HDCharles/rhdev/bin/activate
	}

	activate_eval_env() {
	source /home/HDCharles/vllm/bin/activate
	}

	# ── Helper: checkout branch and reinstall ────────────────────────────────────

	switch_branch() {
	local branch=$1
	echo " Switching to branch: $branch"
	git -C "$REPO_DIR" checkout "$branch" 2>&1
	if [ $? -ne 0 ]; then
	echo " ERROR: git checkout $branch failed"
	return 1
	fi
	activate_quant_env
	pip install -e "$REPO_DIR" 2>&1 \| tail -1
	echo " Installed llm-compressor from branch $branch"
	}

	# ── Helper: run vLLM evaluation with fallback chain ──────────────────────────

	EVAL_BACKEND=""

	run_vllm_eval() {
	local save_dir=$1
	local task=$2
	local num_fewshot=$3
	local max_model_len=$4
	local tp_size=$5
	local eval_output_dir=$6

	mkdir -p "$eval_output_dir"
	EVAL_BACKEND="FAILED"

	activate_eval_env

	echo " EVAL: $task (fewshot=$num_fewshot, tp=$tp_size, max_len=$max_model_len)"

	local chat_args="--apply_chat_template"
	if [ "$num_fewshot" -gt 0 ]; then
	chat_args="$chat_args --fewshot_as_multiturn"
	fi

	if [ "$tp_size" -gt 1 ]; then
	lm_eval \
	--model vllm \
	--model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,tensor_parallel_size=$tp_size,gpu_memory_utilization=0.85" \
	--tasks "$task" \
	--num_fewshot "$num_fewshot" \
	--batch_size auto \
	$chat_args \
	--output_path "$eval_output_dir" 2>&1
	if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_tp${tp_size}"; return 0; fi

	echo " TP=$tp_size failed, trying expert_parallel..."
	lm_eval \
	--model vllm \
	--model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,enable_expert_parallel=True,gpu_memory_utilization=0.85" \
	--tasks "$task" \
	--num_fewshot "$num_fewshot" \
	--batch_size auto \
	$chat_args \
	--output_path "$eval_output_dir" 2>&1
	if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_expert_parallel"; return 0; fi
	fi

	echo " Trying TP=1..."
	lm_eval \
	--model vllm \
	--model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,gpu_memory_utilization=0.85" \
	--tasks "$task" \
	--num_fewshot "$num_fewshot" \
	--batch_size auto \
	$chat_args \
	--output_path "$eval_output_dir" 2>&1
	if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_tp1"; return 0; fi

	echo " Trying enforce_eager..."
	lm_eval \
	--model vllm \
	--model_args "pretrained=$save_dir,dtype=auto,max_model_len=$max_model_len,add_bos_token=True,enforce_eager=True,gpu_memory_utilization=0.85" \
	--tasks "$task" \
	--num_fewshot "$num_fewshot" \
	--batch_size auto \
	$chat_args \
	--output_path "$eval_output_dir" 2>&1
	if [ $? -eq 0 ]; then EVAL_BACKEND="vllm_eager"; return 0; fi

	echo " Trying hf backend as last resort..."
	lm_eval \
	--model hf \
	--model_args "pretrained=$save_dir,dtype=auto,add_bos_token=True" \
	--tasks "$task" \
	--num_fewshot "$num_fewshot" \
	--batch_size auto \
	$chat_args \
	--output_path "$eval_output_dir" 2>&1
	if [ $? -eq 0 ]; then EVAL_BACKEND="hf"; return 0; fi

	EVAL_BACKEND="FAILED"
	return 1
	}

	# ── Helper: run HF-only evaluation ─────────────────────────────────────────

	run_hf_eval() {
	local save_dir=$1
	local task=$2
	local num_fewshot=$3
	local eval_output_dir=$4

	mkdir -p "$eval_output_dir"
	EVAL_BACKEND="FAILED"

	activate_eval_env

	echo " EVAL: $task (fewshot=$num_fewshot, backend=hf)"

	local chat_args="--apply_chat_template"
	if [ "$num_fewshot" -gt 0 ]; then
	chat_args="$chat_args --fewshot_as_multiturn"
	fi

	lm_eval \
	--model hf \
	--model_args "pretrained=$save_dir,dtype=auto,add_bos_token=True" \
	--tasks "$task" \
	--num_fewshot "$num_fewshot" \
	--batch_size auto \
	$chat_args \
	--output_path "$eval_output_dir" 2>&1
	if [ $? -eq 0 ]; then EVAL_BACKEND="hf"; return 0; fi

	EVAL_BACKEND="FAILED"
	return 1
	}

	# ── Helper: extract metric from lm_eval JSON results ────────────────────────

	extract_metric() {
	local eval_output_dir=$1
	local task=$2

	local results_json
	results_json=$(find "$eval_output_dir" -name "results_*.json" -type f 2>/dev/null \| sort \| tail -1)

	if [ -z "$results_json" ]; then
	echo "N/A"
	return
	fi

	python3 -c "
	import json, sys
	with open('$results_json') as f:
	data = json.load(f)
	results = data.get('results', {})
	task = '$task'

	task_results = None
	for key in results:
	if task in key:
	task_results = results[key]
	break

	if task_results is None:
	print('N/A')
	sys.exit()

	if 'gsm8k' in task:
	val = task_results.get('exact_match,strict-match')
	if val is not None:
	print(f'{val*100:.2f}%')
	else:
	print('N/A')
	elif 'wikitext' in task:
	val = task_results.get('word_perplexity,none')
	if val is not None:
	print(f'{val:.2f}')
	else:
	print('N/A')
	elif 'mmlu' in task:
	val = task_results.get('acc,none')
	if val is not None:
	print(f'{val*100:.2f}%')
	else:
	print('N/A')
	else:
	for k, v in task_results.items():
	if 'stderr' not in k and k != 'alias' and isinstance(v, (int, float)):
	print(f'{v:.4f}')
	sys.exit()
	print('N/A')
	" 2>/dev/null \|\| echo "N/A"
	}

	# ── Helper: print current results summary ────────────────────────────────────

	print_summary() {
	echo ""
	echo "╔══════════════════════════════════════════════════════════════════════════════════════════════════════╗"
	echo "║ RESULTS SUMMARY (so far) ║"
	echo "╚══════════════════════════════════════════════════════════════════════════════════════════════════════╝"
	echo ""
	if [ -f "$RESULTS_CSV" ]; then
	column -t -s',' < "$RESULTS_CSV"
	else
	echo "(no results yet)"
	fi
	echo ""
	echo "════════════════════════════════════════════════════════════════════════════════════════════════════════"
	echo ""
	}

	# ── Helper: print branch comparison table ─────────────────────────────────

	print_comparison() {
	if [ ! -f "$RESULTS_CSV" ]; then
	return
	fi

	python3 - "$RESULTS_CSV" <<'PYEOF'
	import csv, sys

	csv_path = sys.argv[1]

	rows = []
	with open(csv_path) as f:
	reader = csv.DictReader(f)
	for r in reader:
	rows.append(r)

	if not rows:
	sys.exit()

	# Build lookup: (model, scheme, technique, task) -> {branch: metric}
	lookup = {}
	for r in rows:
	key = (r["model"], r["scheme"], r["technique"], r["task"])
	lookup.setdefault(key, {})
	lookup[key][r["branch"]] = r["metric"]

	entries = [(k, v) for k, v in lookup.items()
	if "main" in v and any(b != "main" for b in v)]
	if not entries:
	sys.exit()

	pr_branch = [b for b in next(iter(lookup.values())) if b != "main"]
	pr_branch = pr_branch[0] if pr_branch else "pr"

	def parse_metric(s):
	s = s.strip()
	if s.endswith("%"):
	return float(s[:-1]), True
	try:
	return float(s), False
	except ValueError:
	return None, False

	def calc_change(main_str, pr_str, task):
	m_val, _ = parse_metric(main_str)
	p_val, _ = parse_metric(pr_str)
	if m_val is None or p_val is None or m_val == 0:
	return "N/A"
	if "wikitext" in task:
	pct = (m_val - p_val) / m_val * 100
	else:
	pct = (p_val - m_val) / m_val * 100
	sign = "+" if pct >= 0 else ""
	return f"{sign}{pct:.2f}%"

	print("")
	print("╔════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗")
	print(f"║ BRANCH COMPARISON (main vs {pr_branch})")
	print("╚════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝")
	print("")

	header = (f"{'model':<30} {'scheme':<10} {'technique':<12} {'task':<18} "
	f"{'main':>14} {'PR':>14} {'change':>12}")
	print(header)
	print("-" * len(header))

	for (model, scheme, technique, task), metrics in sorted(entries):
	m = metrics.get("main", "")
	p = metrics.get(pr_branch, "")
	change = calc_change(m, p, task) if m and p else ""
	print(f"{model:<30} {scheme:<10} {technique:<12} {task:<18} "
	f"{m:>14} {p:>14} {change:>12}")

	print("")
	PYEOF
	}

	# ── Initialize results CSV ──────────────────────────────────────────────────

	if [ -f "$RESULTS_CSV" ]; then
	cp "$RESULTS_CSV" "${RESULTS_CSV}.bak"
	fi
	echo "model,scheme,technique,branch,task,metric,status,eval_backend,save_dir" > "$RESULTS_CSV"

	# ── Main loop ────────────────────────────────────────────────────────────────

	TOTAL=0
	PASSED=0
	FAILED=0

	for model_idx in "${!MODELS[@]}"; do
	model="${MODELS[$model_idx]}"
	model_short="${MODEL_SHORT_NAMES[$model_idx]}"
	IFS=',' read -r max_model_len tp_size num_gpus_quant <<< "${MODEL_VLLM_ARGS[$model_idx]}"

	for technique in "${TECHNIQUES[@]}"; do
	for scheme in "${SCHEMES[@]}"; do
	for branch in "${BRANCHES[@]}"; do

	save_dir="$MODEL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}"

	echo ""
	echo "╔══════════════════════════════════════════════════════════════════════════════════════════╗"
	echo "║ MODEL: $model_short"
	echo "║ SCHEME: $scheme"
	echo "║ TECHNIQUE: $technique"
	echo "║ BRANCH: $branch"
	echo "╚══════════════════════════════════════════════════════════════════════════════════════════╝"
	echo ""

	# ── Skip entirely if all evals already have results ────
	all_evals_cached=true
	for eval_idx in "${!EVAL_NAMES[@]}"; do
	eval_dir="$EVAL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}/${EVAL_NAMES[$eval_idx]}"
	if ! find "$eval_dir" -name "results_*.json" -type f 2>/dev/null \| grep -q .; then
	all_evals_cached=false
	break
	fi
	done
	if [ "$all_evals_cached" = true ]; then
	echo "All evals already cached, skipping quantization and eval."
	for eval_idx in "${!EVAL_NAMES[@]}"; do
	eval_name="${EVAL_NAMES[$eval_idx]}"
	lm_task="${EVAL_LM_TASKS[$eval_idx]}"
	eval_dir="$EVAL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}/${eval_name}"
	metric_val=$(extract_metric "$eval_dir" "$lm_task")
	echo " $eval_name: $metric_val"
	echo "$model_short,$scheme,$technique,$branch,$eval_name,$metric_val,PASSED,cached,$save_dir" >> "$RESULTS_CSV"
	PASSED=$((PASSED + 1))
	TOTAL=$((TOTAL + 1))
	done
	print_summary
	print_comparison
	continue
	fi

	# ── Switch branch and reinstall ───────────────────────
	switch_branch "$branch"
	if [ $? -ne 0 ]; then
	echo "BRANCH SWITCH FAILED for $branch"
	for eval_name in "${EVAL_NAMES[@]}"; do
	echo "$model_short,$scheme,$technique,$branch,$eval_name,N/A,BRANCH_FAILED,N/A,$save_dir" >> "$RESULTS_CSV"
	done
	FAILED=$((FAILED + ${#EVAL_NAMES[@]}))
	TOTAL=$((TOTAL + ${#EVAL_NAMES[@]}))
	print_summary
	continue
	fi

	# ── Quantize (skip if model already exists) ────────────
	if [ -d "$save_dir" ] && [ -f "$save_dir/config.json" ]; then
	echo "Quantized model already exists at $save_dir, skipping quantization."
	else
	activate_quant_env

	echo "============================================"
	echo "Running: quantize.py (model=$model_short, technique=$technique, scheme=$scheme, branch=$branch)"
	echo "============================================"

	if [ "$num_gpus_quant" -gt 1 ]; then
	torchrun --nproc_per_node="$num_gpus_quant" "$REPO_DIR/testing/quantize.py" \
	--model "$model" --technique "$technique" --scheme "$scheme" \
	--save-dir "$save_dir" 2>&1
	else
	python "$REPO_DIR/testing/quantize.py" \
	--model "$model" --technique "$technique" --scheme "$scheme" \
	--save-dir "$save_dir" 2>&1
	fi

	quant_status=$?
	if [ $quant_status -ne 0 ]; then
	echo "QUANTIZATION FAILED for $model_short / $scheme / $technique / $branch"
	for eval_name in "${EVAL_NAMES[@]}"; do
	echo "$model_short,$scheme,$technique,$branch,$eval_name,N/A,QUANT_FAILED,N/A,$save_dir" >> "$RESULTS_CSV"
	done
	FAILED=$((FAILED + ${#EVAL_NAMES[@]}))
	TOTAL=$((TOTAL + ${#EVAL_NAMES[@]}))
	print_summary
	print_comparison
	continue
	fi
	fi

	# ── Clear GPU memory before eval ─────────────────────────
	python3 -c "import torch; torch.cuda.empty_cache(); [torch.cuda.reset_peak_memory_stats(i) for i in range(torch.cuda.device_count())]" 2>/dev/null

	# ── Evaluate ─────────────────────────────────────────────
	for eval_idx in "${!EVAL_NAMES[@]}"; do
	eval_name="${EVAL_NAMES[$eval_idx]}"
	lm_task="${EVAL_LM_TASKS[$eval_idx]}"
	fewshot="${EVAL_FEWSHOT[$eval_idx]}"
	backend="${EVAL_BACKENDS[$eval_idx]}"
	eval_dir="$EVAL_BASE_DIR/${model_short}-${scheme}-${technique}-${branch}/${eval_name}"

	TOTAL=$((TOTAL + 1))

	# Skip eval if results already exist
	existing_result=$(find "$eval_dir" -name "results_*.json" -type f 2>/dev/null \| sort \| tail -1)
	if [ -n "$existing_result" ]; then
	metric_val=$(extract_metric "$eval_dir" "$lm_task")
	echo " EVAL: $eval_name — skipping, previous result found: $metric_val"
	echo "$model_short,$scheme,$technique,$branch,$eval_name,$metric_val,PASSED,cached,$save_dir" >> "$RESULTS_CSV"
	PASSED=$((PASSED + 1))
	continue
	fi

	if [ "$backend" == "hf" ]; then
	run_hf_eval "$save_dir" "$lm_task" "$fewshot" "$eval_dir"
	else
	run_vllm_eval "$save_dir" "$lm_task" "$fewshot" "$max_model_len" "$tp_size" "$eval_dir"
	fi
	eval_status=$?

	if [ $eval_status -eq 0 ]; then
	metric_val=$(extract_metric "$eval_dir" "$lm_task")
	echo "PASS EVAL"
	echo "$model_short,$scheme,$technique,$branch,$eval_name,$metric_val,PASSED,$EVAL_BACKEND,$save_dir" >> "$RESULTS_CSV"
	PASSED=$((PASSED + 1))
	else
	echo "FAIL EVAL"
	echo "$model_short,$scheme,$technique,$branch,$eval_name,N/A,FAILED,$EVAL_BACKEND,$save_dir" >> "$RESULTS_CSV"
	FAILED=$((FAILED + 1))
	fi
	done

	# ── Clean up model to free disk space ────────────────────
	if [ -d "$save_dir" ]; then
	echo "Removing quantized model at $save_dir to free disk space."
	rm -rf "$save_dir"
	fi

	print_summary
	print_comparison

	done # branch
	done # scheme
	done # technique
	done # model

	# ── Restore original branch ─────────────────────────────────────────────────

	echo "Restoring original branch: $ORIGINAL_BRANCH"
	git -C "$REPO_DIR" checkout "$ORIGINAL_BRANCH" 2>&1
	activate_quant_env
	pip install -e "$REPO_DIR" 2>&1 \| tail -1

	# ── Final Summary ────────────────────────────────────────────────────────────

	echo ""
	echo "╔══════════════════════════════════════════════════════════════════════════════════════════╗"
	echo "║ FINAL SUMMARY: $PASSED passed, $FAILED failed out of $TOTAL total evaluations ║"
	echo "╚══════════════════════════════════════════════════════════════════════════════════════════╝"
	echo ""
	print_summary
	print_comparison
	echo "Results CSV: $RESULTS_CSV"
	echo "Eval outputs: $EVAL_BASE_DIR/"