Created
February 3, 2025 23:31
-
-
Save unbracketed/1b001f5b655a9e65c7a5201a33c0ff91 to your computer and use it in GitHub Desktop.
A Python script that will apply image-to-text transformations for any images found in the directory it runs in using a few different models for comparison.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.12" | |
# dependencies = [ | |
# "torch", | |
# "transformers", | |
# "Pillow", | |
# ] | |
# /// | |
from pathlib import Path | |
import torch | |
from transformers import pipeline | |
# List of models to test | |
MODELS = [ | |
"ydshieh/vit-gpt2-coco-en", | |
"Salesforce/blip-image-captioning-large", | |
"microsoft/git-base-coco", | |
] | |
def process_image(image_path: Path, models): | |
"""Process a single image through multiple models.""" | |
print(f"\n{image_path.name}") | |
for model_name in models: | |
try: | |
# Initialize model | |
captioner = pipeline(model=model_name, device=0 if torch.cuda.is_available() else -1) | |
# Generate caption | |
caption = captioner(str(image_path)) | |
# Get caption text | |
caption_text = caption[0]['generated_text'] if isinstance(caption, list) else caption | |
print(f"{model_name}: {caption_text}") | |
except Exception as e: | |
print(f"{model_name}: Error: {str(e)}") | |
def main(): | |
"""Process all images in current directory.""" | |
# Get all image files in current directory | |
image_files = list(Path().glob('*.jpg')) + list(Path().glob('*.png')) | |
if not image_files: | |
print("No image files (jpg/png) found in current directory!") | |
return | |
# Process each image | |
for img_path in image_files: | |
process_image(img_path, MODELS) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment