-
-
Save 524c/3b2f63ec0ac46eaae1d796dc0e946ad2 to your computer and use it in GitHub Desktop.
A single line to try out mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit on MacOS with mlx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from mlx_lm import load, generate | |
# Parse CLI arguments | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--prompt", type=str, default="hello", help="Custom prompt text") | |
parser.add_argument("--max-tokens", type=int, default=1024, help="Maximum number of tokens to generate") | |
args = parser.parse_args() | |
# Load model | |
model, tokenizer = load("mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit") | |
prompt = args.prompt | |
# If the tokenizer supports chat templates, wrap input | |
if tokenizer.chat_template is not None: | |
messages = [{"role": "user", "content": prompt}] | |
prompt = tokenizer.apply_chat_template( | |
messages, add_generation_prompt=True | |
) | |
# Generate response | |
response = generate( | |
model, | |
tokenizer, | |
prompt=prompt, | |
verbose=True, | |
max_tokens=args.max_tokens | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment