Skip to content

Instantly share code, notes, and snippets.

@pszemraj
Created September 8, 2025 07:59
Show Gist options
  • Save pszemraj/469062a5cf895738c9d50f9428079684 to your computer and use it in GitHub Desktop.
Save pszemraj/469062a5cf895738c9d50f9428079684 to your computer and use it in GitHub Desktop.
LFM2-VL inference with recommended params
from transformers import AutoProcessor, AutoModelForImageTextToText
from transformers.image_utils import load_image
# Load model and processor
model_id = "LiquidAI/LFM2-VL-1.6B"
model = AutoModelForImageTextToText.from_pretrained(
model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
# Load image and create conversation
# url = "https://www.ilankelman.org/stopsigns/australia.jpg"
url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340"
image = load_image(url)
conversation = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": "What is in this image?"},
],
},
]
# Generate Answer
inputs = processor.apply_chat_template(
conversation,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True,
tokenize=True,
min_image_tokens=64,
max_image_tokens=256,
do_image_splitting=True,
).to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=128,
do_sample=True,
temperature=0.1,
min_p=0.15,
repetition_penalty=1.05,
)
print(processor.batch_decode(outputs, skip_special_tokens=True)[0])
from transformers import AutoProcessor, AutoModelForImageTextToText
from transformers.image_utils import load_image
# Load model and processor
model_id = "LiquidAI/LFM2-VL-450M"
model = AutoModelForImageTextToText.from_pretrained(
model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
# Load image and create conversation
# url = "https://www.ilankelman.org/stopsigns/australia.jpg"
url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340"
image = load_image(url)
conversation = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": "What is in this image?"},
],
},
]
# Generate Answer
inputs = processor.apply_chat_template(
conversation,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True,
tokenize=True,
min_image_tokens=64,
max_image_tokens=256,
do_image_splitting=True,
).to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=128,
do_sample=True,
temperature=0.1,
min_p=0.15,
repetition_penalty=1.05,
)
print(processor.batch_decode(outputs, skip_special_tokens=True)[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment