Created
September 8, 2025 07:59
-
-
Save pszemraj/469062a5cf895738c9d50f9428079684 to your computer and use it in GitHub Desktop.
LFM2-VL inference with recommended params
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from transformers import AutoProcessor, AutoModelForImageTextToText | |
| from transformers.image_utils import load_image | |
| # Load model and processor | |
| model_id = "LiquidAI/LFM2-VL-1.6B" | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True | |
| ) | |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
| # Load image and create conversation | |
| # url = "https://www.ilankelman.org/stopsigns/australia.jpg" | |
| url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340" | |
| image = load_image(url) | |
| conversation = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image}, | |
| {"type": "text", "text": "What is in this image?"}, | |
| ], | |
| }, | |
| ] | |
| # Generate Answer | |
| inputs = processor.apply_chat_template( | |
| conversation, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| return_dict=True, | |
| tokenize=True, | |
| min_image_tokens=64, | |
| max_image_tokens=256, | |
| do_image_splitting=True, | |
| ).to(model.device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=128, | |
| do_sample=True, | |
| temperature=0.1, | |
| min_p=0.15, | |
| repetition_penalty=1.05, | |
| ) | |
| print(processor.batch_decode(outputs, skip_special_tokens=True)[0]) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from transformers import AutoProcessor, AutoModelForImageTextToText | |
| from transformers.image_utils import load_image | |
| # Load model and processor | |
| model_id = "LiquidAI/LFM2-VL-450M" | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True | |
| ) | |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
| # Load image and create conversation | |
| # url = "https://www.ilankelman.org/stopsigns/australia.jpg" | |
| url = "https://static.wikia.nocookie.net/shrek/images/d/d9/Shrek_in_The_Pentaverate.webp/revision/latest/scale-to-width-down/1000?cb=20220628125340" | |
| image = load_image(url) | |
| conversation = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image}, | |
| {"type": "text", "text": "What is in this image?"}, | |
| ], | |
| }, | |
| ] | |
| # Generate Answer | |
| inputs = processor.apply_chat_template( | |
| conversation, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| return_dict=True, | |
| tokenize=True, | |
| min_image_tokens=64, | |
| max_image_tokens=256, | |
| do_image_splitting=True, | |
| ).to(model.device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=128, | |
| do_sample=True, | |
| temperature=0.1, | |
| min_p=0.15, | |
| repetition_penalty=1.05, | |
| ) | |
| print(processor.batch_decode(outputs, skip_special_tokens=True)[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment