pszemraj · September 8, 2025 07:59
diff --git a/lfm_1b6.py b/lfm_1b6.py
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from transformers.image_utils import load_image

 # Load model and processor
 model_id = "LiquidAI/LFM2-VL-1.6B"
 model = AutoModelForImageTextToText.from_pretrained(
    model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True
 )
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

 # Load image and create conversation
 # url = "https://www.ilankelman.org/stopsigns/australia.jpg"
 url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340"

 image = load_image(url)
 conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": "What is in this image?"},
        ],
    },
 ]

 # Generate Answer
 inputs = processor.apply_chat_template(
    conversation,
    add_generation_prompt=True,
    return_tensors="pt",
    return_dict=True,
    tokenize=True,
    min_image_tokens=64,
    max_image_tokens=256,
    do_image_splitting=True,
 ).to(model.device)
 outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    do_sample=True,
    temperature=0.1,
    min_p=0.15,
    repetition_penalty=1.05,
 )

 print(processor.batch_decode(outputs, skip_special_tokens=True)[0])

diff --git a/lfm_450m.py b/lfm_450m.py
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from transformers.image_utils import load_image

 # Load model and processor
 model_id = "LiquidAI/LFM2-VL-450M"
 model = AutoModelForImageTextToText.from_pretrained(
    model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True
 )
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

 # Load image and create conversation
 # url = "https://www.ilankelman.org/stopsigns/australia.jpg"
 url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340"

 image = load_image(url)
 conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": "What is in this image?"},
        ],
    },
 ]

 # Generate Answer
 inputs = processor.apply_chat_template(
    conversation,
    add_generation_prompt=True,
    return_tensors="pt",
    return_dict=True,
    tokenize=True,
    min_image_tokens=64,
    max_image_tokens=256,
    do_image_splitting=True,
 ).to(model.device)
 outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    do_sample=True,
    temperature=0.1,
    min_p=0.15,
    repetition_penalty=1.05,
 )
 print(processor.batch_decode(outputs, skip_special_tokens=True)[0])
	from transformers import AutoProcessor, AutoModelForImageTextToText
	from transformers.image_utils import load_image

	# Load model and processor
	model_id = "LiquidAI/LFM2-VL-1.6B"
	model = AutoModelForImageTextToText.from_pretrained(
	model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True
	)
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

	# Load image and create conversation
	# url = "https://www.ilankelman.org/stopsigns/australia.jpg"
	url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340"

	image = load_image(url)
	conversation = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image},
	{"type": "text", "text": "What is in this image?"},
	],
	},
	]

	# Generate Answer
	inputs = processor.apply_chat_template(
	conversation,
	add_generation_prompt=True,
	return_tensors="pt",
	return_dict=True,
	tokenize=True,
	min_image_tokens=64,
	max_image_tokens=256,
	do_image_splitting=True,
	).to(model.device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	do_sample=True,
	temperature=0.1,
	min_p=0.15,
	repetition_penalty=1.05,
	)

	print(processor.batch_decode(outputs, skip_special_tokens=True)[0])