ksasao · November 26, 2025 10:39
diff --git a/readme.md b/readme.md
diff --git a/sara_test.py b/sara_test.py
 import requests
 import sys
 import os
 import warnings
 from PIL import Image
 from transformers import AutoModelForCausalLM, AutoProcessor, set_seed
 import torch
 import logging

 # Suppress warnings
 warnings.filterwarnings('ignore')
 logging.getLogger('transformers').setLevel(logging.ERROR)

 # Define model path
 model_path = "sbintuitions/sarashina2.2-vision-3b"
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.set_float32_matmul_precision("medium")

 # Load model and processor
 processor = AutoProcessor.from_pretrained(
    model_path, trust_remote_code=True, use_fast=False)

 model = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    device_map="auto",
    dtype=torch.float16,
    attn_implementation="sdpa"
 )

 # Get image path from command line argument
 if len(sys.argv) < 2:
    print("使用方法: python sara_test.py <画像URLまたはローカルファイルパス> [質問文]")
    print("例: python sara_test.py https://example.com/image.jpg")
    print("例: python sara_test.py C:\\path\\to\\image.jpg")
    print("例: python sara_test.py image.jpg これはどこで撮った写真ですか？")
    sys.exit(1)

 image_path = sys.argv[1]
 question = sys.argv[2] if len(sys.argv) > 2 else "これはどこで撮った写真ですか？"

 # Load image from URL or local file
 if image_path.startswith(('http://', 'https://')):
    image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
    image_url = image_path
 else:
    if not os.path.exists(image_path):
        print(f"エラー: ファイルが見つかりません: {image_path}")
        sys.exit(1)
    image = Image.open(image_path).convert("RGB")
    image_url = image_path

 message = [
    {
        "role": "user",
        "content": [
            {
                "type": "image",
                "image": image_url,
            },
            {
                "type": "text",
                "text": question,
            },
        ],
    }
 ]
 text_prompt = processor.apply_chat_template(message, add_generation_prompt=True)
 inputs = processor(
    text=[text_prompt],
    images=[image],
    padding=True,
    return_tensors="pt",
 )
 inputs = inputs.to(model.device)

 with torch.inference_mode():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=False,
        repetition_penalty=1.2,
        use_cache=True,
        pad_token_id=processor.tokenizer.eos_token_id
    )

 generated_ids = [
    output_ids[len(input_ids) :] for input_ids, output_ids in zip(inputs.input_ids, output_ids)
 ]
 output_text = processor.batch_decode(
    generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
 )
 print(output_text[0])
	import requests
	import sys
	import os
	import warnings
	from PIL import Image
	from transformers import AutoModelForCausalLM, AutoProcessor, set_seed
	import torch
	import logging

	# Suppress warnings
	warnings.filterwarnings('ignore')
	logging.getLogger('transformers').setLevel(logging.ERROR)

	# Define model path
	model_path = "sbintuitions/sarashina2.2-vision-3b"
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.set_float32_matmul_precision("medium")

	# Load model and processor
	processor = AutoProcessor.from_pretrained(
	model_path, trust_remote_code=True, use_fast=False)

	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	trust_remote_code=True,
	device_map="auto",
	dtype=torch.float16,
	attn_implementation="sdpa"
	)

	# Get image path from command line argument
	if len(sys.argv) < 2:
	print("使用方法: python sara_test.py <画像URLまたはローカルファイルパス> [質問文]")
	print("例: python sara_test.py https://example.com/image.jpg")
	print("例: python sara_test.py C:\\path\\to\\image.jpg")
	print("例: python sara_test.py image.jpg これはどこで撮った写真ですか？")
	sys.exit(1)

	image_path = sys.argv[1]
	question = sys.argv[2] if len(sys.argv) > 2 else "これはどこで撮った写真ですか？"

	# Load image from URL or local file
	if image_path.startswith(('http://', 'https://')):
	image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
	image_url = image_path
	else:
	if not os.path.exists(image_path):
	print(f"エラー: ファイルが見つかりません: {image_path}")
	sys.exit(1)
	image = Image.open(image_path).convert("RGB")
	image_url = image_path

	message = [
	{
	"role": "user",
	"content": [
	{
	"type": "image",
	"image": image_url,
	},
	{
	"type": "text",
	"text": question,
	},
	],
	}
	]
	text_prompt = processor.apply_chat_template(message, add_generation_prompt=True)
	inputs = processor(
	text=[text_prompt],
	images=[image],
	padding=True,
	return_tensors="pt",
	)
	inputs = inputs.to(model.device)

	with torch.inference_mode():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False,
	repetition_penalty=1.2,
	use_cache=True,
	pad_token_id=processor.tokenizer.eos_token_id
	)

	generated_ids = [
	output_ids[len(input_ids) :] for input_ids, output_ids in zip(inputs.input_ids, output_ids)
	]
	output_text = processor.batch_decode(
	generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
	)
	print(output_text[0])
No results found