Last active
November 5, 2025 04:51
-
-
Save pszemraj/a97e4720b186d7a4064f9aa40845b861 to your computer and use it in GitHub Desktop.
inference with 3b
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| example script for inference with LFM2-VL-3B model | |
| https://hf.co/LiquidAI/LFM2-VL-3B | |
| """ | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| from transformers.image_utils import load_image | |
| # Load model and processor | |
| model_id = "LiquidAI/LFM2-VL-3B" | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| model_id, device_map="auto", dtype="bfloat16" | |
| ) | |
| # Load image and create conversation | |
| url = "https://www.ilankelman.org/stopsigns/australia.jpg" | |
| image = load_image(url) | |
| conversation = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image}, | |
| {"type": "text", "text": "What is in this image?"}, | |
| ], | |
| }, | |
| ] | |
| # Generate Answer | |
| text_kwargs = dict( | |
| do_sample=True, | |
| temperature=0.1, | |
| min_p=0.15, | |
| repetition_penalty=1.05, | |
| max_new_tokens=128, | |
| ) | |
| vision_kwargs = dict( | |
| min_image_tokens=64, | |
| max_image_tokens=256, | |
| do_image_splitting=True, | |
| ) | |
| inputs = processor.apply_chat_template( | |
| conversation, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| return_dict=True, | |
| tokenize=True, | |
| **vision_kwargs, | |
| ).to(model.device) | |
| outputs = model.generate(**inputs, **text_kwargs) | |
| # drop the prompt part; keep only newly generated tokens | |
| prompt_len = inputs["input_ids"].shape[-1] | |
| gen_ids = outputs[:, prompt_len:] | |
| print(processor.batch_decode(gen_ids, skip_special_tokens=True)[0]) | |
| # This image captures a vibrant street scene in a Chinatown area. The focal point is a large red Chinese archway with gold and black accents, adorned with Chinese characters. Flanking the archway are two white stone lion statues, which are traditional guardians in Chinese culture. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment