Last active
January 15, 2025 02:47
-
-
Save GOROman/8947f32074df2370ea8c4b5877e9632b to your computer and use it in GitHub Desktop.
MLX + MLX_VLM + Qwen2-VL-2B-Instruct-4bit で画像をVLMで解説してもらう
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = "==3.12" | |
# dependencies = ["mlx==0.21.0", "mlx_vlm"] | |
# /// | |
import mlx.core as mx | |
import numpy as np | |
from mlx_vlm import load, generate | |
from mlx_vlm.prompt_utils import apply_chat_template | |
from mlx_vlm.utils import load_config | |
# Load the model | |
model_path = "mlx-community/Qwen2-VL-2B-Instruct-4bit" | |
model, processor = load(model_path) | |
config = load_config(model_path) | |
# Prepare input | |
image = ["yellow-hage.jpg"] | |
prompt = "Describe this image." | |
# Apply chat template | |
formatted_prompt = apply_chat_template( | |
processor, config, prompt, num_images=len(image) | |
) | |
# Generate output | |
output = generate(model, processor, formatted_prompt, image, verbose=True, dtype=np.float32) | |
print(output) |
それでもダメな場合は、諦めて来世に期待だ。
繰り返して呼ぶと中国語になりがち。
プロンプトで強制したら日本語吐けたです。
prompt = "この画像を詳細に説明してください。日本語で応答してください。"
# /// script
# requires-python = "==3.12"
# dependencies = ["mlx_vlm"]
# ///
を冒頭に追加したので、uv run mlx_vlm_test.py
でいける。
dependencies = ["mlx==0.21.0", "mlx_vlm"]
だとOK。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
どうにもダメな場合は、VSCode に Cline 入れて、ターミナルでエラー出して勝手に直してもらうと良い。
https://marketplace.visualstudio.com/items?itemName=saoudrizwan.claude-dev