Last active
February 6, 2024 10:10
-
-
Save MadameMinty/aff416c44905c38abbe2c80436893ac6 to your computer and use it in GitHub Desktop.
Tag images with LLaVA and colorsort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf | |
# 2024-02-02 | |
# Tag images with LLaVA and colorsort | |
# # SETUP Windows | |
# install Python https://www.python.org/downloads/ | |
# ensure you have WSL and a distro like Ubuntu installed from the Microsoft Store | |
# run `wsl` in a terminal | |
# run `curl https://ollama.ai/install.sh | sh` in wsl | |
# run `ollama serve` in wsl | |
# open a new terminal without closing the above and run `wsl` | |
# run `ollama pull llava:7b-v1.6-mistral-q5_K_M` to download the model | |
# you can also use a different model https://ollama.ai/library/llava/tags | |
# to fit into your VRAM. I recommend `mistral-q*_K_M` family. | |
# Smaller is faster, too. | |
# | |
# # USE | |
# `wsl`, `ollama serve` | |
# in a new terminal or from a shortcut, `python llava.py "E:\Photos"` | |
# the script will connect to ollama API, and process all images | |
# in the directory and its subdirectories _and overwrite them_ | |
# | |
# # SETUP Linux | |
# you know what to do | |
# | |
# RESULT | |
# You can now search for tags in Windows Explorer etc. | |
# with "tags:something" in the search bar | |
# and sort by "Comments" to sort by the dominant color | |
import base64 | |
import requests | |
from PIL import Image | |
import re | |
from pathlib import Path | |
r_tags = re.compile(r'[^a-zA-Z, ]') | |
r_datetime = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}\.\d{2}\.\d{2}') | |
MODEL = "llava:7b-v1.6-mistral-q5_K_M" | |
# dominant color constants | |
# blend weights | |
V = 33 | |
H = V*V | |
# technical | |
MAX_COLOR = H*255 + V*255 + 255 | |
int16u = 65535 | |
def dominant_color(image) -> bytes: | |
h, s, v = image\ | |
.convert('HSV')\ | |
.resize((1, 1), resample=0)\ | |
.getpixel((0, 0)) | |
value = H*h + V*v + s | |
value = str(int((value/MAX_COLOR) * int16u)) | |
value_bytes: bytes = value.encode('utf-16le') | |
return value_bytes | |
def extract_title(file: Path) -> str: | |
title: str = file.stem | |
title = re.sub(r_datetime, '', title).strip() | |
if title: | |
title = f' titled "{title}"' | |
return title | |
def encode_image_to_base64(file: Path) -> str: | |
with open(file, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
def ollama(file: Path) -> str: | |
image_base64: str = encode_image_to_base64(file) | |
title: str = extract_title(file) | |
response = requests.post('http://localhost:11434/api/generate', json={ | |
"model": MODEL, | |
"prompt": f'''Generate a comma-separated list of five dictionary words describing this image{title}.''', | |
"stream": False, | |
"images": [image_base64] | |
}) | |
if response.status_code == 200: | |
data: dict = response.json() | |
tags: str = data.get('response', '').lower() | |
# check if explicit or refusal | |
if 'explicit' in tags \ | |
or 'sexual' in tags \ | |
or 'as an AI' in tags: | |
return 'explicit' | |
# check if the response is comma-separated | |
if ',' not in tags: | |
if '-' in tags: | |
tags = tags.replace('-', ';') | |
if '\n' in tags: | |
tags = tags.replace('\r', '') | |
tags = tags.replace('\n', ';') | |
tags = tags.replace(';;', ';').replace(';;', ';') | |
# remove illegal characters | |
tags = re.sub(r_tags, '', tags).replace('-', ' ') | |
# semicolon;separated;deduplicated | |
tags_set: set = set(tags.split(',')) | |
tags_set = {tag.strip() for tag in tags_set} | |
tags = ';'.join(tags_set) | |
return tags | |
else: | |
return '' | |
def process_images(directory: str = r'E:\Photos'): | |
extensions = {".jpg", ".jpeg", ".jfif", } | |
files = ( | |
p.absolute() | |
for p in Path(directory).glob("**/*") | |
if p.suffix.lower() in extensions) | |
for file in files: | |
with Image.open(file) as image: | |
# if image.format == 'JPEG': | |
exif = image.getexif() | |
# keep trying until we get a valid tag list | |
tags: str = '' | |
tries: int = 0 | |
while (not tags or len(tags) > 64) and tries < 4: | |
tags: str = ollama(file) | |
tries += 1 | |
# https://exiftool.org/TagNames/EXIF.html | |
# 0x9c9b XPTitle Title | |
# 0x9c9c XPComment Comments <- dominant color | |
# 0x9c9d XPAuthor Authors | |
# 0x9c9e XPKeywords Tags;like;this <- tags | |
# 0x9c9f XPSubject unreadable | |
tags_bytes: bytes = tags.encode('utf-16le') | |
exif[0x9c9e] = tags_bytes | |
exif[0x9c9c] = dominant_color(image) | |
image.save(file, 'JPEG', exif=exif) | |
if __name__ == "__main__": | |
import sys | |
process_images(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment