Last active
October 7, 2025 03:42
-
-
Save do-me/34516f7f4d8cc701da823089b09a3359 to your computer and use it in GitHub Desktop.
A single line to try out mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit on MacOS with mlx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from mlx_lm import load, generate | |
# Parse CLI arguments | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--prompt", type=str, default="hello", help="Custom prompt text") | |
parser.add_argument("--max-tokens", type=int, default=1024, help="Maximum number of tokens to generate") | |
args = parser.parse_args() | |
# Load model | |
model, tokenizer = load("mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit") | |
prompt = args.prompt | |
# If the tokenizer supports chat templates, wrap input | |
if tokenizer.chat_template is not None: | |
messages = [{"role": "user", "content": prompt}] | |
prompt = tokenizer.apply_chat_template( | |
messages, add_generation_prompt=True | |
) | |
# Generate response | |
response = generate( | |
model, | |
tokenizer, | |
prompt=prompt, | |
verbose=True, | |
max_tokens=args.max_tokens | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Update: this is a better option to actually chat with the model:
uv run --with git+https://github.com/ml-explore/mlx-lm.git mlx_lm.chat --model mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit --max-tokens 10000