ovshake · March 21, 2023 17:43
diff --git a/generate_captions.py b/generate_captions.py
 import sys
 sys.path.append('src/blip')
 sys.path.append('clip-interrogator')
 import torch
 from clip_interrogator import Config, Interrogator
 from PIL import Image
 from clip_interrogator import Config, Interrogator
 from tqdm import tqdm
 config = Config()
 config.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 config.blip_offload = False if torch.cuda.is_available() else True
 config.chunk_size = 2048
 config.flavor_intermediate_count = 512
 config.blip_num_beams = 64
 ci = Interrogator(config)

 def inference(image_path, best_max_flavors):
    image = Image.open(image_path)
    image = image.convert('RGB')
    prompt_result = ci.interrogate(image, max_flavors=int(best_max_flavors))
    print("mode best: " + prompt_result)
    return prompt_result




 # read filepaths from a txt file
 # and run inference on each image
 # and write the results to a txt file
 def run_inference_on_images(filepaths, best_max_flavors):
    with open('results.txt', 'w') as file:
        for filepath in tqdm(filepaths):
            file.write(inference(filepath, best_max_flavors) + '\n')


 # read filepaths from a txt file
 filepaths = open('data.txt', 'r').read().splitlines()
 run_inference_on_images(filepaths, 5)
	import sys
	sys.path.append('src/blip')
	sys.path.append('clip-interrogator')
	import torch
	from clip_interrogator import Config, Interrogator
	from PIL import Image
	from clip_interrogator import Config, Interrogator
	from tqdm import tqdm
	config = Config()
	config.device = 'cuda' if torch.cuda.is_available() else 'cpu'
	config.blip_offload = False if torch.cuda.is_available() else True
	config.chunk_size = 2048
	config.flavor_intermediate_count = 512
	config.blip_num_beams = 64
	ci = Interrogator(config)

	def inference(image_path, best_max_flavors):
	image = Image.open(image_path)
	image = image.convert('RGB')
	prompt_result = ci.interrogate(image, max_flavors=int(best_max_flavors))
	print("mode best: " + prompt_result)
	return prompt_result




	# read filepaths from a txt file
	# and run inference on each image
	# and write the results to a txt file
	def run_inference_on_images(filepaths, best_max_flavors):
	with open('results.txt', 'w') as file:
	for filepath in tqdm(filepaths):
	file.write(inference(filepath, best_max_flavors) + '\n')


	# read filepaths from a txt file
	filepaths = open('data.txt', 'r').read().splitlines()
	run_inference_on_images(filepaths, 5)