Created
August 6, 2024 12:15
-
-
Save boxabirds/383056f642daed9fa3e1be249f863d10 to your computer and use it in GitHub Desktop.
Generate alt tags for images (using llava)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from bs4 import BeautifulSoup | |
import argparse | |
import base64 | |
import json | |
def download_image(url, save_path): | |
response = requests.get(url) | |
if response.status_code == 200: | |
with open(save_path, 'wb') as f: | |
f.write(response.content) | |
return True | |
return False | |
def get_alt_text(image_path, llava_url): | |
with open(image_path, "rb") as image_file: | |
image_base64 = base64.b64encode(image_file.read()).decode('utf-8') | |
payload = { | |
"image": image_base64, | |
"prompt": "Describe this image in a concise manner suitable for an alt text." | |
} | |
response = requests.post(llava_url, json=payload) | |
if response.status_code == 200: | |
return response.json()['response'] | |
else: | |
return None | |
def process_html(html_path, output_path, llava_url): | |
with open(html_path, 'r') as f: | |
soup = BeautifulSoup(f, 'html.parser') | |
img_tags = soup.find_all('img') | |
for img in img_tags: | |
src = img.get('src') | |
if src and src.startswith(('http://', 'https://')): | |
filename = os.path.basename(src) | |
save_path = os.path.join('images', filename) | |
if not os.path.exists('images'): | |
os.makedirs('images') | |
if download_image(src, save_path): | |
alt_text = get_alt_text(save_path, llava_url) | |
if alt_text: | |
img['alt'] = alt_text | |
print(f"Added alt text to {src}: {alt_text}") | |
else: | |
print(f"Failed to generate alt text for {src}") | |
else: | |
print(f"Failed to download image: {src}") | |
with open(output_path, 'w') as f: | |
f.write(str(soup)) | |
def main(): | |
parser = argparse.ArgumentParser(description="Process HTML and add alt text to images using LLaVA") | |
parser.add_argument("html_path", help="Path to the input HTML file") | |
parser.add_argument("output_path", help="Path to save the output HTML file") | |
parser.add_argument("llava_url", help="URL of the LLaVA server") | |
args = parser.parse_args() | |
process_html(args.html_path, args.output_path, args.llava_url) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment