Install packages:
pip install open-webui mlx-lm
Start Open WebUI server:
| import numpy as np | |
| import mlx.core as mx | |
| import matplotlib.pyplot as plt | |
| from matplotlib.animation import FuncAnimation | |
| import tqdm | |
| def conway(a: mx.array): | |
| source = """ | 
| def generate_speculative( | |
| model: nn.Module, | |
| draft_model: nn.Module, | |
| tokenizer: Union[PreTrainedTokenizer, TokenizerWrapper], | |
| prompt: str, | |
| max_tokens: int = 100, | |
| verbose: bool = False, | |
| formatter: Optional[Callable] = None, | |
| **kwargs, | 
| import argparse | |
| import json | |
| import logging | |
| import os | |
| import re | |
| import shutil | |
| from concurrent.futures import ProcessPoolExecutor, as_completed | |
| from dataclasses import dataclass | |
| from datetime import datetime | |
| from typing import Any, Callable, Dict, List, Literal, Optional, Tuple | 
| You are an assistant that engages in extremely thorough, self-questioning reasoning. Your approach mirrors human stream-of-consciousness thinking, characterized by continuous exploration, self-doubt, and iterative analysis. | |
| ## Core Principles | |
| 1. EXPLORATION OVER CONCLUSION | |
| - Never rush to conclusions | |
| - Keep exploring until a solution emerges naturally from the evidence | |
| - If uncertain, continue reasoning indefinitely | |
| - Question every assumption and inference | 
| import mlx.core as mx | |
| import mlx.nn as nn | |
| from typing import Tuple, Type, Optional, List, Any | |
| import importlib | |
| from transformers import AutoTokenizer | |
| from mlx_lm.utils import load_model, get_model_path | |
| def get_qwen3_embedding_classes(config: dict) -> Tuple[Type[nn.Module], Type]: | 
A minimal Husky pre-commit hook with AI-powered code review via Amp's Oracle feature.
| import argparse | |
| import copy | |
| import mlx.core as mx | |
| from pathlib import Path | |
| from mlx_lm import load, stream_generate | |
| from mlx_lm.generate import generate_step | |
| from mlx_lm.models.cache import make_prompt_cache | |
| DEFAULT_MAX_TOKENS = 2048 | 
The command for evaluating on MMLU Pro:
mlx_lm.evaluate --model model/repo --task mmlu_pro
The command for efficiency benchmarks: