Created
March 21, 2023 17:43
-
-
Save ovshake/69efb594f3b1e8d98b34687b16916145 to your computer and use it in GitHub Desktop.
Generating captions from clip interrogator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
sys.path.append('src/blip') | |
sys.path.append('clip-interrogator') | |
import torch | |
from clip_interrogator import Config, Interrogator | |
from PIL import Image | |
from clip_interrogator import Config, Interrogator | |
from tqdm import tqdm | |
config = Config() | |
config.device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
config.blip_offload = False if torch.cuda.is_available() else True | |
config.chunk_size = 2048 | |
config.flavor_intermediate_count = 512 | |
config.blip_num_beams = 64 | |
ci = Interrogator(config) | |
def inference(image_path, best_max_flavors): | |
image = Image.open(image_path) | |
image = image.convert('RGB') | |
prompt_result = ci.interrogate(image, max_flavors=int(best_max_flavors)) | |
print("mode best: " + prompt_result) | |
return prompt_result | |
# read filepaths from a txt file | |
# and run inference on each image | |
# and write the results to a txt file | |
def run_inference_on_images(filepaths, best_max_flavors): | |
with open('results.txt', 'w') as file: | |
for filepath in tqdm(filepaths): | |
file.write(inference(filepath, best_max_flavors) + '\n') | |
# read filepaths from a txt file | |
filepaths = open('data.txt', 'r').read().splitlines() | |
run_inference_on_images(filepaths, 5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment