Created
November 29, 2023 17:48
-
-
Save twobob/f6a85866879d8e3a8778b09e7fd592fe to your computer and use it in GitHub Desktop.
SSD Segmind bot with BLIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import random | |
import uuid | |
import subprocess | |
from typing import List, Optional | |
from tqdm import tqdm | |
import re | |
import time | |
### optional automated install | |
''' | |
def check_and_install(lib_name_mapping): | |
for lib, runtime_name in lib_name_mapping.items(): | |
try: | |
__import__(runtime_name) | |
print(f'{runtime_name} is already installed.') | |
except ImportError: | |
print(f'Installing {runtime_name}...') | |
install_cmd = ['pip', 'install', runtime_name] | |
subprocess.run(install_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
print(f'{runtime_name} has been installed.') | |
# Library name mapping | |
lib_name_mapping = { | |
'discord': 'discord', | |
'transformers': 'transformers', | |
'aiohttp': 'aiohttp', | |
'numpy': 'numpy', | |
'PIL': 'Pillow', | |
'torch': 'torch', | |
'diffusers': 'diffusers', | |
'controlnet_aux': 'controlnet_aux', | |
'dotenv': 'python-dotenv', | |
'compel': 'compel', | |
'gradio': 'gradio', | |
'clip_interrogator': 'clip-interrogator', | |
'cv2': 'opencv-contrib-python', | |
} | |
check_and_install(lib_name_mapping) | |
''' | |
## clip-interrogator-0.6.0 huggingface-hub-0.19.4 open_clip_torch-2.23.0 tokenizers-0.15.0 protobuf-4.25.1 | |
try: | |
import cv2 | |
except ImportError: | |
print('Issue importing the cv2 module. Please install `pip install opencv-contrib-python`') | |
try: | |
import discord | |
from discord import app_commands | |
except ImportError: | |
print('Issue importing the discord module. Please install `pip install discord`') | |
try: | |
from transformers import pipeline , CLIPTokenizer, AutoImageProcessor, UperNetForSemanticSegmentation #, AutoModelForCausalLM , pipeline | |
except ImportError: | |
print('Issue importing the transformers module. Please install `pip install transformers`') | |
try: | |
import aiohttp | |
except ImportError: | |
print('Issue importing the numpy module. Please install `pip install aiohttp`') | |
try: | |
import numpy as np | |
except ImportError: | |
print('Issue importing the numpy module. Please install `pip install numpy`') | |
try: | |
from PIL import Image, ImageDraw, ImageFont | |
except ImportError: | |
print('Issue importing the PIL module. Please install `pip install PIL`') | |
try: | |
import torch | |
from torch import autocast | |
except ImportError: | |
print('Issue importing the torch module. Please install `pip install torch`') | |
try: | |
from diffusers import LCMScheduler, AutoPipelineForText2Image, AutoencoderKL, StableDiffusionControlNetPipeline, StableDiffusionXLPipeline, DiffusionPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLControlNetPipeline, ControlNetModel, UniPCMultistepScheduler | |
from diffusers.utils import load_image | |
except ImportError: | |
print('Issue importing the diffusers module. Please install `pip install diffusers`') | |
try: | |
from controlnet_aux import OpenposeDetector | |
except ImportError: | |
print('Issue importing the controlnet_aux module. Please install `pip install controlnet_aux`') | |
try: | |
from dotenv import load_dotenv | |
except ImportError: | |
print('Issue importing the dotenv module. Please install `pip install python-dotenv`') | |
try: | |
from compel import Compel, ReturnedEmbeddingsType | |
except ImportError: | |
print('Issue importing the compel module. Please install `pip install git+https://github.com/damian0815/compel/`') | |
try: | |
#import gradio as gr | |
from clip_interrogator import Config, Interrogator | |
except ImportError: | |
print('Issue importing the clip_interrogator module. Please install `pip install open-clip-torch clip-interrogator`') | |
if not os.path.isfile('./realesrgan-ncnn-vulkan.exe'): | |
print('Can not find `./realesrgan-ncnn-vulkan.exe` in current working directory') | |
# create env file if it does not exist with default values | |
# Read from it afterwards | |
# | |
if not os.path.isfile('.env'): | |
with open(".env", "w") as f: | |
f.write("DISCORD_BOT_TOKEN=\n") | |
f.write("CACHE_EXAMPLES=1\n") | |
f.write("MAX_IMAGE_SIZE=1024\n") | |
f.write("USE_TORCH_COMPILE=1\n") | |
f.write("ENABLE_CPU_OFFLOAD=0\n") | |
f.write("ENABLE_REFINER=0") | |
# Load settings from the .env file | |
load_dotenv() | |
from lists import get_random_terms, seg_palette, get_random_mix | |
caption_model_name = 'blip-large' #@param ["blip-base", "blip-large", "git-large-coco"] | |
clip_model_name = 'ViT-L-14/openai' #@param ["ViT-L-14/openai", "ViT-H-14/laion2b_s32b_b79k"] | |
ci = None | |
def image_analysis(imagepath): | |
image = load_image(imagepath) | |
image = image.convert('RGB') | |
image_features = ci.image_to_features(image) | |
top_mediums = ci.mediums.rank(image_features, 5) | |
top_artists = ci.artists.rank(image_features, 5) | |
top_movements = ci.movements.rank(image_features, 5) | |
top_trendings = ci.trendings.rank(image_features, 5) | |
top_flavors = ci.flavors.rank(image_features, 5) | |
medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))} | |
artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))} | |
movement_ranks = {movement: sim for movement, sim in zip(top_movements, ci.similarities(image_features, top_movements))} | |
trending_ranks = {trending: sim for trending, sim in zip(top_trendings, ci.similarities(image_features, top_trendings))} | |
flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))} | |
return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks | |
def image_to_prompt(imagepath, mode): | |
ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024 | |
ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024 | |
image = load_image(imagepath) | |
image = image.convert('RGB') | |
if mode == 'best': | |
return ci.interrogate(image) | |
elif mode == 'classic': | |
return ci.interrogate_classic(image) | |
elif mode == 'fast': | |
return ci.interrogate_fast(image) | |
elif mode == 'negative': | |
return ci.interrogate_negative(image) | |
# Discord Bot Setup | |
intents = discord.Intents.default() | |
intents.message_content = True | |
intents = discord.Intents.default() | |
client = discord.Client(intents=intents) | |
tree = discord.app_commands.CommandTree(client) | |
# Environment Setup and Model Initialization | |
MAX_SEED = np.iinfo(np.int32).max | |
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "1") == "1" | |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024")) | |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "1") == "1" | |
ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1" | |
ENABLE_REFINER = os.getenv("ENABLE_REFINER", "0") == "1" | |
DEFAULT_SCALE = 1 | |
IMAGE_COUNT = 1 | |
MAX_STEPS = 100 | |
unique_name = "oops.png" | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
### Model query defaults | |
GUIDANCE_DEFAULT = 1.0 | |
STEPS_DEFAULT = 14 | |
WIDTH_DEFAULT = 800 | |
HEIGHT_DEFAULT = 1280 | |
IMAGES_COUNT_DEFAULT = 6 | |
TARGET_STRENGTH_DEFAULT = 0.5 | |
NEGATIVE_PROMPT_DEFAULT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" | |
DEFAULT_FONT_LOCATION = "C:\Windows\WinSxS\amd64_microsoft-windows-font-truetype-arial_31bf3856ad364e35_10.0.22621.1_none_d4193be3a119442b\arial.ttf" # Path to a .ttf font file | |
DEFAULT_IMAGES_FOLDER_PREFIX = ".\\image\\" | |
CAPTION_MODEL_DEFAULT= "blip-base", # default value for caption model | |
CLIP_MODEL_DEFAULT = "ViT-L-14/openai", # default value for clip model | |
CAPTION_TYPE_DEFAULT= "image_to_prompt" # default value for captioning type | |
### | |
vae = None | |
pipe = None | |
refiner = None | |
control_net_image_path = None | |
upscale_image_size = None | |
target_conditioning = None | |
target_pooled = None | |
target_prompt = "" | |
negative_target_prompt = "" | |
negative_target_conditioning = None | |
negative_target_pooled = None | |
#negative_prompt_two = "" | |
org_init_image_size = 0 | |
adapter_id = "latent-consistency/lcm-lora-ssd-1b" | |
if torch.cuda.is_available(): | |
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) | |
pipe = AutoPipelineForText2Image.from_pretrained( | |
#pipe = StableDiffusionXLPipeline.from_pretrained( | |
#"stabilityai/stable-diffusion-xl-base-1.0", | |
"segmind/SSD-1B", | |
vae=vae, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
variant="fp16", | |
) | |
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
if ENABLE_REFINER: | |
refiner = DiffusionPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-refiner-1.0", | |
vae=vae, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
variant="fp16", | |
) | |
if ENABLE_CPU_OFFLOAD: | |
pipe.enable_model_cpu_offload() | |
if refiner is not None: | |
refiner.enable_model_cpu_offload() | |
else: | |
pipe.to(device) | |
if refiner is not None: | |
refiner.to(device) | |
pipe.load_lora_weights(adapter_id) | |
pipe.fuse_lora() | |
if USE_TORCH_COMPILE and not os.name == 'nt': # Check if the OS is not Windows | |
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) | |
if refiner is not None: | |
refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True) | |
compel = Compel( | |
tokenizer=[pipe.tokenizer, pipe.tokenizer_2] , | |
text_encoder=[pipe.text_encoder, pipe.text_encoder_2], | |
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, | |
requires_pooled=[False, True] | |
) | |
compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder) | |
# Image To image Pipeline | |
model_id_or_path = "segmind/SSD-1B" | |
#pipe_image_to_image = StableDiffusionXLImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16) | |
#pipe_image_to_image = pipe.to('cuda') | |
#init_image = Image.open("wub.png").convert("RGB").resize((768, 512)) | |
#prompt = "A fantasy landscape, trending on artstation" | |
#images = pipe_image_to_image(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images | |
#images[0].save("fantasy_landscape.png") | |
# AUTO PROMPT GENERATION | |
#sd_tokenizer = AutoTokenizer.from_pretrained('Gustavosta/MagicPrompt-Stable-Diffusion') | |
#sd_model = AutoModelForCausalLM.from_pretrained('Gustavosta/MagicPrompt-Stable-Diffusion') | |
sd_pipeline = pipeline('text-generation', model='Gustavosta/MagicPrompt-Stable-Diffusion', max_length=128, pad_token_id=0) | |
def save_image(img, add_watermark=True): | |
global DEFAULT_SCALE | |
# Load a font | |
if add_watermark: | |
font_path = DEFAULT_FONT_LOCATION | |
try: | |
font = ImageFont.truetype(font_path, 30) # Adjust the size to fit your needs | |
except IOError: | |
font = ImageFont.load_default() | |
# Add watermark | |
watermark_text = "AI Enthusiasts" | |
draw = ImageDraw.Draw(img) | |
# Calculate the bounding box at (0, 0) position | |
bbox = draw.textbbox((0, 0), watermark_text, font=font) | |
# Calculate text width and height from bbox | |
text_width = bbox[2] - bbox[0] | |
text_height = bbox[3] - bbox[1] | |
# Calculate x, y for bottom right position | |
x = img.width - text_width - 20 # 10 pixels from the right | |
y = img.height - text_height - 20 # 10 pixels from the bottom | |
# Draw the text | |
draw.text((x, y), watermark_text, font=font, fill=(127, 127, 128)) | |
# Save the image with a unique name | |
unique_name = str(uuid.uuid4()) + '.png' | |
img.save(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, unique_name))) | |
upscale_name = os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, 'discord_img_gen_upscale_'+unique_name) | |
if DEFAULT_SCALE != 1: | |
if img.width * img.height > 819200 and DEFAULT_SCALE > 3: | |
DEFAULT_SCALE = 3 | |
if img.width * img.height > 1310720 and DEFAULT_SCALE > 2: | |
DEFAULT_SCALE = 2 | |
if DEFAULT_SCALE < 1: | |
w, h = img.size | |
new_w = int(w * DEFAULT_SCALE) | |
new_h = int(h * DEFAULT_SCALE) | |
processed_img = img.resize((new_w, new_h)) | |
processed_img.save(upscale_name) | |
else: | |
# Run the executable | |
subprocess.run(['./realesrgan-ncnn-vulkan.exe', '-i', str(unique_name), '-o', upscale_name, '-s', str(DEFAULT_SCALE)], check=True) | |
# Load the processed image | |
processed_img = Image.open(upscale_name) | |
return unique_name | |
def randomize_seed_fn(seed: int) -> int: | |
seed = random.randint(0, MAX_SEED) | |
return seed | |
def is_empty_string(s): | |
return s is None or not s.strip() | |
# Function to generate images | |
# The complete generate_for_discord function | |
def generate_for_discord( | |
prompt: str, | |
width: int = WIDTH_DEFAULT, | |
height: int = HEIGHT_DEFAULT, | |
guidance_scale: float = GUIDANCE_DEFAULT, | |
num_inference_steps: int = STEPS_DEFAULT, | |
apply_refiner: bool = True, | |
scale: float = 1.0, | |
seed: int = 0, | |
#negative_prompt: str = "ugly, blurry, poor quality, watermarked, text, typopgraphy, signature, signed" | |
negative_prompt: str = NEGATIVE_PROMPT_DEFAULT, | |
#image_to_image: bool = False, | |
#image_to_image_strength: float = 0.3, | |
auto :bool = False, | |
#image_to_image_prompt: str = "" | |
#quality_terms :int = 0, | |
#lighting_terms: int = 0, | |
#media_terms : int =0, | |
#random_real_artists :int =0, | |
#style_terms: int=0, | |
random_terms: bool=False, | |
#controlnet_type: str="Depth", | |
init_image_path: str = None, | |
#caption_model: str = CAPTION_MODEL_DEFAULT,# "blip-base", # default value for caption model | |
#clip_model: str = CLIP_MODEL_DEFAULT,# "ViT-L-14/openai", # default value for clip model | |
#captioning_type: str = CAPTION_TYPE_DEFAULT,# "image_analysis" # default value for captioning type | |
target_mode: bool=False, | |
target_strength: float = TARGET_STRENGTH_DEFAULT, | |
): | |
global IMAGE_COUNT | |
init_image = None | |
control_net_image_path = None | |
latents = None | |
image = None | |
image_path = None | |
generator = torch.Generator().manual_seed(seed) | |
if num_inference_steps > MAX_STEPS // IMAGE_COUNT : | |
num_inference_steps = MAX_STEPS // IMAGE_COUNT | |
target_strength = max(0, min(target_strength, 1)) | |
conditioning, pooled = compel(prompt) | |
negative_conditioning, negative_pooled = compel(negative_prompt) | |
if init_image_path is not None and target_mode: | |
conditioning = conditioning - ((conditioning - target_conditioning) * target_strength ) | |
pooled = pooled - ((pooled - target_pooled)* target_strength ) | |
negative_conditioning = negative_conditioning - ((negative_conditioning - negative_target_conditioning) * target_strength ) | |
negative_pooled = negative_pooled - ((negative_pooled - negative_target_pooled) * target_strength) | |
#if image_to_image_prompt == "": | |
# image_to_image_prompt = prompt | |
# add support for second prompt embeddings | |
#i2i_conditioning, i2i_pooled = compel(image_to_image_prompt) | |
#image_to_image_path = "not_set" | |
#if init_image_path is None: | |
if not apply_refiner or refiner is None: | |
with torch.autocast("cuda"): | |
image = pipe( | |
#prompt=prompt, | |
prompt_embeds=conditioning, | |
pooled_prompt_embeds=pooled, | |
seed=seed, | |
#negative_prompt=negative_prompt, | |
negative_prompt_embeds=negative_conditioning, | |
negative_pooled_prompt_embeds=negative_pooled, | |
width=width, | |
height=height, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
generator=generator | |
).images[0] | |
else: | |
with torch.autocast("cuda"): | |
latents = pipe.to_latent( | |
#prompt=prompt, | |
prompt_embeds=conditioning, | |
pooled_prompt_embeds=pooled, | |
width=width, | |
seed=seed, | |
negative_prompt_embeds=negative_conditioning, | |
negative_pooled_prompt_embeds=negative_pooled, | |
#negative_prompt=negative_prompt, | |
height=height, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
generator=generator, | |
output_type="latent" | |
).images | |
image = refiner( | |
#prompt=prompt, | |
prompt_embeds=conditioning, | |
pooled_prompt_embeds=pooled, | |
seed=seed, | |
negative_prompt_embeds=negative_conditioning, | |
negative_pooled_prompt_embeds=negative_pooled, | |
#negative_prompt=negative_prompt, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
latents=latents, | |
generator=generator, | |
).images[0] | |
image_path = save_image(image) | |
mask_layer_image_path=None | |
print(("image_path", image_path, "seed", seed) if init_image_path is not None else ("control_net_image_path", control_net_image_path, "seed", seed)) | |
#print ("image_to_image_path", image_to_image_path, "seed", seed, "current seed", i2i_seed) | |
#return image_path, image_to_image_path, seed, num_inference_steps, prompt, i2i_seed | |
return image_path, seed, num_inference_steps, prompt, control_net_image_path, mask_layer_image_path | |
# canny | |
''' | |
if init_image_path is not None and controlnet_type == "Canny": | |
model_id_or_path = "segmind/SSD-1B" | |
# load the controlnet model for canny edge detection | |
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16) | |
#controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16) | |
# load the stable diffusion pipeline with controlnet | |
#controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16) | |
controlnet_pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16) | |
#controlnet_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(model_id_or_path, controlnet=controlnet, torch_dtype=torch.float16) | |
#pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id_or_path, controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16) | |
#controlnet_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
# set scheduler | |
controlnet_pipe.scheduler = LCMScheduler.from_config(controlnet_pipe.scheduler.config) | |
# load LCM-LoRA | |
controlnet_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") | |
# enable efficient implementations using xformers for faster inference | |
controlnet_pipe.enable_xformers_memory_efficient_attention() | |
controlnet_pipe.enable_model_cpu_offload() | |
#### DO | |
image_input = load_image(init_image_path) # Assuming load_image is defined | |
image_input = np.array(image_input) | |
# Define parameters for canny edge detection | |
low_threshold = 100 | |
high_threshold = 200 | |
# Do canny edge detection | |
image_canny = cv2.Canny(image_input, low_threshold, high_threshold) | |
image_canny = image_canny[:, :, None] | |
image_canny = np.concatenate([image_canny, image_canny, image_canny], axis=2) | |
image_canny = Image.fromarray(image_canny) | |
mask_layer_image_path = save_image(image_canny, False) | |
# Prepare arguments for the function call | |
call_args = { | |
"num_inference_steps": num_inference_steps, | |
"height": height, | |
"width": width, | |
"negative_prompt": negative_prompt, | |
"guidance_scale": guidance_scale, | |
"generator": generator | |
} | |
# Include 'latents' only if it's not None | |
if latents is not None: | |
call_args["latents"] = latents | |
# Function call with dynamic arguments | |
image_output = controlnet_pipe(prompt= prompt,image=image_canny, **call_args).images[0] | |
control_net_image_path = save_image(image_output) | |
# add second timeline handling where an alternate prompt was entered and we have recompute our compel embeddings and NOT increment the seed. | |
# seg map | |
if init_image_path is not None and controlnet_type == "Segmentation": | |
### SEGEMENTATION | |
# load the image processor and the model for doing segmentation | |
image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small") | |
image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small") | |
# load the controlnet model for semantic segmentation | |
seg_controlnet = ControlNetModel.from_pretrained( | |
"lllyasviel/sd-controlnet-seg", torch_dtype=torch.float16 | |
) | |
# load the stable diffusion pipeline with controlnet | |
seg_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 | |
) | |
#seg_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
# set scheduler | |
seg_pipe.scheduler = LCMScheduler.from_config(seg_pipe.scheduler.config) | |
# load LCM-LoRA | |
seg_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") | |
seg_pipe.enable_xformers_memory_efficient_attention() | |
seg_pipe.enable_model_cpu_offload() | |
### DO | |
image_input = load_image(init_image_path) # Assuming load_image is defined | |
image_input = np.array(image_input) | |
# get the pixel values | |
pixel_values = image_processor(image_input, return_tensors="pt").pixel_values | |
# do semantic segmentation | |
with torch.no_grad(): | |
outputs = image_segmentor(pixel_values) | |
print(image_input.size) | |
# post process the semantic segmentation | |
seg = image_processor.post_process_semantic_segmentation(outputs,target_sizes=[(width, height)])[0] | |
# Assuming 'known_height' is the height you want to use | |
#seg = image_processor.post_process_semantic_segmentation(outputs, target_sizes=[(image_input.size, known_height)])[0]`` | |
# add colors to the different identified classes | |
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3 | |
for label, color in enumerate(seg_palette): | |
color_seg[seg == label, :] = color | |
# convert into PIL image format | |
color_seg = color_seg.astype(np.uint8) | |
image_seg = Image.fromarray(color_seg) | |
mask_layer_image_path = save_image(image_seg, False) | |
# Prepare arguments for the function call | |
call_args = { | |
"num_inference_steps": num_inference_steps, | |
"height": height, | |
"width": width, | |
"negative_prompt": negative_prompt, | |
"guidance_scale": guidance_scale, | |
"generator": generator | |
} | |
# Include 'latents' only if it's not None | |
if latents is not None: | |
call_args["latents"] = latents | |
image_output = seg_pipe(prompt, image_seg, **call_args).images[0] | |
control_net_image_path = save_image(image_output) | |
# DEPTH | |
if init_image_path is not None and controlnet_type == "Depth": | |
### DEPTH | |
# load the depth estimator model | |
depth_estimator = pipeline('depth-estimation') | |
# load the controlnet model for depth estimation | |
depth_controlnet = ControlNetModel.from_pretrained( | |
"lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 | |
) | |
# load the stable diffusion pipeline with controlnet | |
depth_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", controlnet=depth_controlnet, safety_checker=None, torch_dtype=torch.float16 | |
) | |
#depth_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
# set scheduler | |
depth_pipe.scheduler = LCMScheduler.from_config(depth_pipe.scheduler.config) | |
# load LCM-LoRA | |
depth_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") | |
# enable efficient implementations using xformers for faster inference | |
depth_pipe.enable_xformers_memory_efficient_attention() | |
depth_pipe.enable_model_cpu_offload() | |
### DO | |
image_input = load_image(init_image_path) | |
image_input = image_input | |
# get depth estimates | |
image_depth = depth_estimator(image_input)['depth'] | |
# convert to PIL image format | |
image_depth = np.array(image_depth) | |
image_depth = image_depth[:, :, None] | |
image_depth = np.concatenate([image_depth, image_depth, image_depth], axis=2) | |
image_depth = Image.fromarray(image_depth) | |
mask_layer_image_path = save_image(image_depth, False) | |
# Prepare arguments for the function call | |
call_args = { | |
"num_inference_steps": num_inference_steps, | |
"height": height, | |
"width": width, | |
"negative_prompt": negative_prompt, | |
"guidance_scale": guidance_scale, | |
"generator": generator | |
} | |
# Include 'latents' only if it's not None | |
if latents is not None: | |
call_args["latents"] = latents | |
image_output = depth_pipe(prompt, image_depth, **call_args).images[0] | |
control_net_image_path = save_image(image_output) | |
## Normals | |
if init_image_path is not None and controlnet_type == "Normal": | |
### NORMALS | |
# load the Dense Prediction Transformer (DPT) model for getting normal maps | |
normal_depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas") | |
# load the controlnet model for normal maps | |
normal_controlnet = ControlNetModel.from_pretrained( | |
"fusing/stable-diffusion-v1-5-controlnet-normal", torch_dtype=torch.float16 | |
) | |
# load the stable diffusion pipeline with controlnet | |
normal_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", controlnet=normal_controlnet, safety_checker=None, torch_dtype=torch.float16 | |
) | |
#normal_pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
# set scheduler | |
normal_pipe.scheduler = LCMScheduler.from_config(normal_pipe.scheduler.config) | |
# load LCM-LoRA | |
normal_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") | |
normal_pipe.enable_xformers_memory_efficient_attention() | |
normal_pipe.enable_model_cpu_offload() | |
### DO | |
#image_input = load_image(init_image_path) | |
#image_input = np.array(image_input) | |
# do all the preprocessing to get the normal image | |
image = depth_estimator(init_image_path)['predicted_depth'][0] | |
image = image.numpy() | |
image_depth = image.copy() | |
image_depth -= np.min(image_depth) | |
image_depth /= np.max(image_depth) | |
bg_threhold = 0.4 | |
x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3) | |
x[image_depth < bg_threhold] = 0 | |
y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3) | |
y[image_depth < bg_threhold] = 0 | |
z = np.ones_like(x) * np.pi * 2.0 | |
image = np.stack([x, y, z], axis=2) | |
image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5 | |
image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8) | |
image_normal = Image.fromarray(image) | |
mask_layer_image_path = save_image(image_normal, False) | |
# Prepare arguments for the function call | |
call_args = { | |
"num_inference_steps": num_inference_steps, | |
"height": height, | |
"width": width, | |
"negative_prompt": negative_prompt, | |
"guidance_scale": guidance_scale, | |
"generator": generator | |
} | |
# Include 'latents' only if it's not None | |
if latents is not None: | |
call_args["latents"] = latents | |
image_output = normal_pipe(prompt, image_normal, **call_args).images[0] | |
control_net_image_path = save_image(image_output) | |
## OPENPOSE | |
if init_image_path is not None and controlnet_type == "OpenPose": | |
### OPEN POSE | |
# load the openpose model | |
#openpose = controlnet_aux.OpenposeDetector.from_pretrained('lllyasviel/ControlNet') | |
openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators") | |
# load the controlnet for openpose | |
openpose_controlnet = ControlNetModel.from_pretrained( | |
"lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16 | |
) | |
# define stable diffusion pipeline with controlnet | |
openpose_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", controlnet=openpose_controlnet, safety_checker=None, torch_dtype=torch.float16 | |
) | |
#openpose_pipe.scheduler = UniPCMultistepScheduler.from_config(openpose_pipe.scheduler.config) | |
# set scheduler | |
openpose_pipe.scheduler = LCMScheduler.from_config(openpose_pipe.scheduler.config) | |
# load LCM-LoRA | |
openpose_pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") | |
openpose_pipe.enable_xformers_memory_efficient_attention() | |
openpose_pipe.enable_model_cpu_offload() | |
### DO | |
image_input = load_image(init_image_path) | |
image_input = np.array(image_input) | |
image_pose = openpose(image_input) | |
mask_layer_image_path = save_image(image_pose, False) | |
# Prepare arguments for the function call | |
call_args = { | |
"num_inference_steps": num_inference_steps, | |
"height": height, | |
"width": width, | |
"negative_prompt": negative_prompt, | |
"guidance_scale": guidance_scale, | |
"generator": generator | |
} | |
# Include 'latents' only if it's not None | |
if latents is not None: | |
call_args["latents"] = latents | |
image_output = openpose_pipe(prompt, image_pose, **call_args).images[0] | |
control_net_image_path = save_image(image_output) | |
#i2i_seed = seed | |
#if (image_to_image): | |
# if (image_to_image_prompt == ""): | |
# i2i_seed = i2i_seed + 1 | |
#process_img = False | |
# the case where we pass both | |
#if not is_empty_string(init_image_path) and image_to_image: | |
# init_image = Image.open(init_image_path).convert("RGB").resize((768, 512)) | |
# print(f"initial init_image_path {init_image_path}") | |
# process_img = True | |
# the case where we pass only image_to_image | |
#elif image_to_image and is_empty_string(init_image_path) : | |
# init_image = Image.open(image_path).convert("RGB").resize((768, 512)) | |
# print(f"initial init_image_path {init_image_path}") | |
# process_img = True | |
# the case where we pass only init_image_path | |
#elif init_image_path and not image_to_image : | |
# init_image = Image.open(init_image_path).convert("RGB").resize((768, 512)) | |
# print(f"initial init_image_path {init_image_path}") | |
# process_img = True | |
# we now compute new compel embeddings and increment our seed by one conditionally | |
#if process_img: | |
# image = pipe_image_to_image( | |
# prompt=image_to_image_prompt, | |
# #prompt_embeds=i2i_conditioning, | |
# #pooled_prompt_embeds=i2i_pooled, | |
# image=init_image, | |
# strength=image_to_image_strength, | |
# seed=i2i_seed, | |
# negative_prompt=negative_prompt, | |
# width=width, | |
# height=height, | |
# guidance_scale=guidance_scale, | |
# num_inference_steps=num_inference_steps, | |
# generator=generator | |
# ).images[0] | |
# image_to_image_path = save_image(image) | |
#if init_image_path and image_to_image: | |
# image_to_image_path = save_image(init_image) | |
#if not image_to_image: | |
# image_to_image_path = None | |
''' | |
# Discord Bot Commands | |
@client.event | |
async def on_ready(): | |
print(f'Logged in as {client.user}') | |
await tree.sync() | |
dimension_choices = [ | |
app_commands.Choice(name="512", value=512), | |
app_commands.Choice(name="600", value=600), | |
app_commands.Choice(name="720", value=720), | |
app_commands.Choice(name="768", value=768), | |
app_commands.Choice(name="800", value=800), | |
app_commands.Choice(name="1024", value=1024), | |
app_commands.Choice(name="1280", value=1280), | |
app_commands.Choice(name="1440", value=1440), | |
app_commands.Choice(name="1600", value=1600), | |
app_commands.Choice(name="1920", value=1920), | |
app_commands.Choice(name="2048", value=2048), | |
] | |
img_choices = [ | |
app_commands.Choice(name="6", value=6), | |
app_commands.Choice(name="5", value=5), | |
app_commands.Choice(name="4", value=4), | |
app_commands.Choice(name="3", value=3), | |
app_commands.Choice(name="2 (good for scale 2)", value=2), | |
app_commands.Choice(name="1 (good for scale 3 or 4)", value=1), | |
] | |
controlnet_choices =[ | |
app_commands.Choice(name="Canny", value="Canny"), | |
app_commands.Choice(name="Depth", value="Depth"), | |
app_commands.Choice(name="Normal", value="Normal"), | |
app_commands.Choice(name="Segmentation", value="Segmentation"), | |
app_commands.Choice(name="OpenPose", value="OpenPose"), | |
] | |
scale_choices = [ | |
app_commands.Choice(name="1 no upscale (default)", value=1.0), | |
app_commands.Choice(name="2 (1280x1024 max)", value=2.0), | |
app_commands.Choice(name="3 (800x800 max)", value=3.0), | |
app_commands.Choice(name="4 (600x600 max)", value=4.0), | |
app_commands.Choice(name="90%", value=0.90), | |
app_commands.Choice(name="75%", value=0.75), | |
app_commands.Choice(name="66%", value=0.66), | |
app_commands.Choice(name="50%", value=0.5), | |
app_commands.Choice(name="33%", value=0.3), | |
] | |
caption_model_choices = [ | |
app_commands.Choice(name="blip-base", value="blip-base"), | |
app_commands.Choice(name="blip-large", value="blip-large"), | |
app_commands.Choice(name="git-large-coco", value="git-large-coco"), | |
] | |
clip_model_choices = [ | |
app_commands.Choice(name="ViT-L-14/openai", value="ViT-L-14/openai"), | |
app_commands.Choice(name="ViT-H-14/laion2b_s32b_b79k", value="ViT-H-14/laion2b_s32b_b79k"), | |
] | |
clip_mode_choices = [ | |
app_commands.Choice(name='best', value= 'best' ), | |
app_commands.Choice(name='fast', value= 'fast' ), | |
app_commands.Choice(name='classic', value= 'classic' ), | |
app_commands.Choice(name='negative', value= 'negative'), | |
] | |
captioning_type_choices = [ | |
app_commands.Choice(name="image_analysis", value="image_analysis"), | |
app_commands.Choice(name="image_to_prompt", value="image_to_prompt"), | |
app_commands.Choice(name="image_to_prompt and analysis", value="both"), | |
] | |
# Utility function to safely extract the integer value | |
def get_int_value(choice_or_int): | |
return choice_or_int.value if hasattr(choice_or_int, 'value') else choice_or_int | |
async def download_image_attachment(attachment: discord.Attachment) -> str: | |
# Specify the directory where you want to save the image | |
save_directory = "./" | |
os.makedirs(save_directory, exist_ok=True) | |
# Construct the full path where the image will be saved | |
file_path = os.path.join(save_directory, attachment.filename) | |
# Download the image | |
async with aiohttp.ClientSession() as session: | |
async with session.get(attachment.url) as response: | |
if response.status == 200: | |
# Write the image to a file | |
with open(file_path, 'wb') as f: | |
f.write(await response.read()) | |
else: | |
raise Exception(f"Failed to download image: HTTP {response.status}") | |
return file_path | |
@tree.command(name="img", description="Generate an image based on a prompt") | |
@app_commands.describe(prompt="The description for the image to generate") | |
@app_commands.choices( | |
width=dimension_choices, | |
height=dimension_choices, | |
scale=scale_choices, | |
img_count=img_choices, | |
caption_model=caption_model_choices, | |
clip_model=clip_model_choices, | |
captioning_type=captioning_type_choices, | |
clip_mode=clip_mode_choices, | |
#controlnet_type=controlnet_choices | |
) | |
@app_commands.describe( | |
prompt="The description for the image to generate", | |
seed="The seed for the image generation (random default)", | |
width="The width of the image (1024 default)", | |
height="The height of the image (1024 default)", | |
guidance=f"The guidance scale for image generation ({GUIDANCE_DEFAULT} default)", | |
steps=f"The number of inference steps ({STEPS_DEFAULT} default)", | |
refine="Whether to apply the refiner (True default)", | |
scale="upscale the image (1x default)" , | |
img_count=f"how many in the batch? ({IMAGES_COUNT_DEFAULT} default)", | |
negative_prompt="add terms you dont want to see (optional)", | |
#image_to_image="Whether to Image to Image (False default)", | |
#image_to_image_strength="The strength of the Image to Image transfer (0.3 default)", | |
auto="Use prompt AI to extend prompt (False default)", | |
#image_to_image_prompt="The description for the image_to_image to generate", | |
#quality_terms="The number of quality terms to include in the image description (0 default)", | |
#lighting_terms="The number of lighting terms to include in the image description (0 default)", | |
#media_terms="The number of media terms to include in the image description (0 default)", | |
#random_real_artists="The number of random real artist names to include in the image description (0 default)", | |
#style_terms="The number of style terms to include in the image description (0 default)", | |
random_terms="Will include one random style, lighting quality and media terms with one real artist name (False default)", | |
#controlnet_type="The type of control network processing to apply (e.g., Canny, Depth)", | |
init_image="Initial image for Image to Image generation (optional)", | |
init_image_hidden="DO NOT SHOW init_image in outputs", | |
caption_model="Select the model for image captioning: 'blip-base', 'blip-large', or 'git-large-coco' (blip-large Default)", | |
clip_model="Choose the CLIP model for image analysis: 'ViT-L-14/openai' or 'ViT-H-14/laion2b_s32b_b79k' (ViT-L-14 Default)", | |
clip_mode="Choose the CLIP mode for image analysis: 'best', 'fast', 'classic', 'negative' (best Default)", | |
captioning_type="Type of captioning: 'image_analysis' for descriptive captions, 'image_to_prompt' for creative prompts", | |
target_mode="try to coerce one set of weights towards another", | |
target_strength=f"how much do we try to move towards the target weights 0 -> 1 ({TARGET_STRENGTH_DEFAULT} Default)" | |
) | |
async def img( | |
ctx: discord.Interaction, | |
prompt: str, | |
width: int = WIDTH_DEFAULT, # The parameter will be an int or a Choice object 1440 | |
height: int = HEIGHT_DEFAULT, # The parameter will be an int or a Choice object 720 | |
guidance: float = GUIDANCE_DEFAULT, # 1.0 | |
steps: int = STEPS_DEFAULT, # 14 | |
refine: bool = True, | |
scale: float = 1.0, | |
seed: int = 0, | |
img_count: int = IMAGES_COUNT_DEFAULT, # 6 | |
#negative_prompt: str = "ugly, blurry, poor quality, watermarked, text, typopgraphy, signature, signed" | |
negative_prompt: str = NEGATIVE_PROMPT_DEFAULT, | |
#image_to_image: bool = False, | |
#image_to_image_strength: float = 0.3, | |
auto :bool = False, | |
#image_to_image_prompt: str = "" | |
#quality_terms :int = 0, | |
#lighting_terms: int = 0, | |
#media_terms : int =0, | |
#random_real_artists :int =0, | |
#style_terms: int=0, | |
random_terms: bool=False, | |
#controlnet_type: str = "Depth", | |
init_image: discord.Attachment = None, # New parameter for initial image | |
init_image_hidden:bool=False, | |
caption_model: str = "blip-large",#' CAPTION_MODEL_DEFAULT,# "blip-base", # default value for caption model | |
clip_model: str = "ViT-L-14/openai",# CLIP_MODEL_DEFAULT,# "ViT-L-14/openai", # default value for clip model | |
clip_mode: str = "best",# 'fast', 'classic', 'negative' | |
captioning_type: str = "image_to_prompt",#CAPTION_TYPE_DEFAULT,# "image_to_prompt" # default value for captioning type | |
target_mode: bool=False, | |
target_strength: float = TARGET_STRENGTH_DEFAULT, | |
): | |
global DEFAULT_SCALE | |
global IMAGE_COUNT | |
global org_init_image_size | |
global ci | |
global target_conditioning | |
global target_pooled | |
global negative_target_conditioning | |
global negative_target_pooled | |
global target_prompt | |
global negative_target_prompt | |
process_start_time = time.time() # Start timing | |
IMAGE_COUNT = IMAGES_COUNT_DEFAULT | |
DEFAULT_SCALE = scale | |
# Extract integer values safely | |
width_value = get_int_value(width) | |
height_value = get_int_value(height) | |
# Seed handling logic | |
seed = seed if (seed is not None) and (seed > 0) else randomize_seed_fn(seed) | |
await ctx.response.defer() | |
images = [] | |
descriptions = [] | |
captioning_processing_time = 0 | |
print(f"original prompt {prompt}") | |
# ADD PROMPT REQUESTS | |
#value_list = { | |
# "quality": quality_terms, | |
# "lighting": lighting_terms, | |
# "media": media_terms, | |
# "real_artists": random_real_artists, | |
# "style": style_terms, | |
#} | |
# Building the prompt using list comprehension | |
#prompt +=" " + " ".join(get_random_terms(list_name, value_list[list_name]) for list_name in value_list.keys()) | |
total_size = 0 | |
max_size_mb = 7.999 | |
# Handling the initial image if provided | |
init_image_path = None | |
if init_image is not None: | |
# Download the image attachment | |
init_image_path = await download_image_attachment(init_image) | |
if init_image_path is not None: | |
config = Config() | |
config.clip_model_name = clip_model | |
config.caption_model_name = caption_model | |
ci = Interrogator(config) | |
start_time = time.time() # Start timing | |
if not target_mode: | |
if captioning_type == "image_to_prompt" or captioning_type == "both": | |
prompt +=" " + image_to_prompt(init_image_path, clip_mode) | |
negative_prompt = image_to_prompt(init_image_path, 'negative') | |
if target_mode: | |
#target_prompt = "" | |
#negative_target_prompt = "" | |
if captioning_type == "image_to_prompt" or captioning_type == "both": | |
target_prompt = image_to_prompt(init_image_path, clip_mode) | |
negative_target_prompt = image_to_prompt(init_image_path, 'negative') | |
target_conditioning, target_pooled = compel(target_prompt) | |
negative_target_conditioning, negative_target_pooled = compel(negative_target_prompt) | |
if captioning_type == "image_analysis" or captioning_type == "both": | |
medium, artist, movement, trending, flavor = image_analysis(init_image_path) | |
# Iterate through each attribute list and add to the prompt | |
for rank_dict in [medium, artist, movement, trending, flavor]: | |
for attribute, similarity in rank_dict.items(): | |
prompt += f'{attribute}, ' | |
#prompt += f'{attribute} ({similarity:.2f}), ' | |
# Remove the trailing comma and space from the prompt | |
prompt = prompt.strip(', ') | |
end_time = time.time() # End timing | |
captioning_processing_time = end_time - start_time | |
if random_terms: | |
prompt +=" " + get_random_mix() | |
# Load the CLIP tokenizer | |
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") | |
# Tokenize the text | |
tokenized_prompt = tokenizer.encode(prompt) | |
# if init_image_path is not None: | |
# print(f"Extended prompt: {prompt}") | |
# print(f" len Extended prompt: {len(prompt)}, len Extended token: {len(tokenized_prompt)} ") | |
if auto: | |
while len(tokenized_prompt) < 70: | |
prompt = sd_pipeline(prompt+',', num_return_sequences=1)[0]["generated_text"] | |
# Replace multiple spaces, newlines, and commas with a single space or comma respectively | |
prompt = re.sub(r'\s+', ' ', prompt) | |
prompt = re.sub(r',+', ',', prompt).strip() | |
print(f"Extended prompt: {prompt}") | |
# Tokenize the text | |
tokenized_prompt = tokenizer.encode(prompt) | |
#if len(tokenized_prompt) > 77: | |
# print(f"trimming to 77 Tokens") | |
# Truncate the prompt if it exceeds 77 tokens | |
#while len(tokenized_prompt) > 77: | |
# Remove the last word | |
# prompt = ' '+' '.join(prompt.split(' ')[:-1]) | |
# # Re-tokenize | |
# tokenized_prompt = tokenizer.encode(prompt) | |
# Replace multiple spaces and newlines with a single space | |
prompt = re.sub(r'\s+', ' ', prompt).strip() | |
if init_image_path is not None: | |
print(f"Extended prompt: {prompt}") | |
print(f" len Extended prompt: {len(prompt)}, len Extended token: {len(tokenized_prompt)} ") | |
#if image_to_image: | |
# descriptions.append(f"`PROMPT`: {prompt} `W`: {width_value} `H`: {height_value} `CFG`: {guidance} `STEP`: {steps} `REFINER`: {refine} `SCALE`: {scale} `I2I_STR`: {image_to_image_strength}") | |
#else: | |
for i in range(IMAGES_COUNT_DEFAULT): | |
# Call to your generate_for_discord function | |
current_seed = seed + i # if (not image_to_image) and (image_to_image_prompt == "") else seed | |
#image_path, image_to_image_path, used_seed, steps, prompt, i2i_seed = generate_for_discord( | |
image_path, used_seed, steps, final_prompt, control_net_image_path, mask_layer_image_path = generate_for_discord( | |
prompt, | |
width_value, | |
height_value, | |
guidance, | |
steps, | |
refine, | |
scale, | |
current_seed, | |
negative_prompt, | |
#image_to_image, | |
#image_to_image_strength, | |
auto, | |
#image_to_image_prompt | |
#quality_terms, | |
#lighting_terms, | |
#media_terms, | |
#random_real_artists, | |
#style_terms, | |
random_terms, | |
#controlnet_type, | |
init_image_path, # Pass the path of the downloaded initial image | |
target_mode, | |
target_strength, | |
) | |
if init_image is not None and len(init_image.filename) > 0 and i==0: | |
## resize mask | |
# Save the mask with a unique name | |
if not init_image_hidden: | |
mask_unique_name = str(uuid.uuid4()) + '.png' | |
init_image.save(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, mask_unique_name))) | |
mask_scale_name = os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX, 'discord_img_gen_mask_scale_'+mask_unique_name); | |
RESIZE_SCALE = .25 | |
w = init_image.width | |
h = init_image.height | |
new_w = int(w * RESIZE_SCALE) | |
new_h = int(h * RESIZE_SCALE) | |
processing_img = Image.open(init_image.filename) | |
mask_processed_img = processing_img.resize((new_w, new_h)) | |
mask_processed_img.save(mask_scale_name) | |
org_init_image_size = os.path.getsize(mask_scale_name) / (1024 * 1024) # Convert size to MB | |
if total_size + org_init_image_size > max_size_mb: | |
break # Break the loop if the next image would exceed the limit | |
total_size += org_init_image_size | |
#descriptions.append(f"`NET_SEED`: {seed} `NET`: {controlnet_type} `FILESIZE`: {str(org_init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(org_init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(mask_scale_name, description=descriptions[len(descriptions)-1])) | |
if not scale < 1: | |
#if init_image_path is None: | |
image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,image_path))) / (1024 * 1024) # Convert size to MB | |
# Check if adding the next image will exceed the total size limit | |
if total_size + image_size > max_size_mb: | |
break # Break the loop if the next image would exceed the limit | |
total_size += image_size | |
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB ")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,image_path)), description=descriptions[len(descriptions)-1])) | |
#if init_image_path is not None: | |
if mask_layer_image_path is not None and i==0: | |
mask_layer_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path))) / (1024 * 1024) # Convert size to MB | |
if total_size + mask_layer_image_size > max_size_mb: | |
break # Break the loop if the next image would exceed the limit | |
total_size += mask_layer_image_size | |
descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(mask_layer_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path)), description=descriptions[len(descriptions)-1])) | |
#if control_net_image_path is not None: | |
#init_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,control_net_image_path))) / (1024 * 1024) # Convert size to MB | |
#if total_size + init_image_size > max_size_mb: | |
# break # Break the loop if the next image would exceed the limit | |
#total_size += init_image_size | |
#descriptions.append(f"`NET_SEED`: {seed} `NET`{controlnet_type} `FILESIZE`: {str(init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
#descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(init_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
#images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,control_net_image_path)), description=descriptions[len(descriptions)-1])) | |
# #images.append(discord.File(image_path, description=descriptions[len(descriptions)-1])) | |
# control_net_image_path | |
#image_to_image | |
#if image_to_image: | |
# image_to_image_size = os.path.getsize(image_to_image_path) / (1024 * 1024) # Convert size to MB | |
# if total_size + image_to_image_size > max_size_mb: | |
# break # Break the loop if the next image would exceed the limit | |
# total_size += image_to_image_size | |
# descriptions.append(f"`I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_to_image_size)[:4]}MB") | |
# images.append(discord.File(image_to_image_path, description=descriptions[len(descriptions)-1])) | |
# #images.append(discord.File(image_path, description=descriptions[len(descriptions)-1])) | |
upscale_control_net_image_size = None | |
upscale_image_size = None | |
if scale != 1: | |
if image_path is not None: | |
upscale_image_path = f"C:\\Users\\new\\dev\\bot\\{DEFAULT_IMAGES_FOLDER_PREFIX}discord_img_gen_upscale_{image_path}" | |
if init_image_path is not None and len(init_image.filename) > 0: | |
upscale_control_net_image_path = f"C:\\Users\\new\\dev\\bot\\{DEFAULT_IMAGES_FOLDER_PREFIX}discord_img_gen_upscale_{control_net_image_path}" | |
if image_path is not None: | |
upscale_image_size = os.path.getsize(upscale_image_path) / (1024 * 1024) | |
if init_image_path is not None and len(init_image.filename) > 0: | |
upscale_control_net_image_size = os.path.getsize(upscale_control_net_image_path) / (1024 * 1024) | |
if init_image_path is None: | |
if upscale_control_net_image_size is None and org_init_image_size > 0: | |
if total_size + upscale_image_size <= max_size_mb: | |
if scale <1: | |
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
else: | |
#descriptions.append(f"`SEED`: {used_seed} `I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB ") | |
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(upscale_image_path, description=descriptions[len(descriptions)-1])) | |
else: | |
print( f"{total_size} + {upscale_image_size} >= {max_size_mb}") | |
break # Break the loop if the next image would exceed the limit | |
if upscale_control_net_image_size is None and org_init_image_size == 0: | |
if total_size + upscale_image_size <= max_size_mb: | |
if scale <1: | |
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
else: | |
#descriptions.append(f"`SEED`: {used_seed} `I2I_SEED`: {i2i_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB ") | |
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(upscale_image_path, description=descriptions[len(descriptions)-1])) | |
else: | |
print( f"{total_size} + {upscale_image_size} >= {max_size_mb}") | |
break # Break the loop if the next image would exceed the limit | |
# control net | |
if init_image_path is not None and len(init_image.filename) > 0: | |
if mask_layer_image_path is not None and i==0: | |
mask_layer_image_size = os.path.getsize(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path))) / (1024 * 1024) # Convert size to MB | |
if total_size + mask_layer_image_size > max_size_mb: | |
break # Break the loop if the next image would exceed the limit | |
total_size += mask_layer_image_size | |
descriptions.append(f"`NET_SEED`: {seed} `FILESIZE`: {str(mask_layer_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,mask_layer_image_path)), description=descriptions[len(descriptions)-1])) | |
if total_size + upscale_control_net_image_size <= max_size_mb: | |
if scale <1: | |
descriptions.append(f"`SEED`: {used_seed} `FILESIZE`: {str(upscale_control_net_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
else: | |
descriptions.append( | |
f"`SEED`: {used_seed} `FILESIZE`: {str(org_init_image_size)[:4]}MB `UPSCALE FILESIZE`: {str(upscale_control_net_image_size)[:4]}MB")#\n`PROMPT`: {final_prompt} ") | |
images.append(discord.File(str(os.path.join(DEFAULT_IMAGES_FOLDER_PREFIX,upscale_control_net_image_path)), description=descriptions[len(descriptions)-1])) | |
else: | |
print( f"{total_size} + {upscale_control_net_image_size} >= {max_size_mb}") | |
break # Break the loop if the next image would exceed the limit | |
total_end_time = time.time() # End timing | |
total_processing_time = total_end_time - process_start_time | |
descriptions.append(f"`PROMPT`: {final_prompt} `W`: {width_value} `H`: {height_value} `CFG`: {guidance} `STEP`: {steps} `REFINER`: {refine} `SCALE`: {scale}\n`TOTAL_TIME` {str(total_processing_time)[:4]}sec\n") | |
if init_image_path is not None: | |
old_descrip = descriptions[len(descriptions)-1] | |
old_descrip = old_descrip+ f"`CAPTION_MODEL` {caption_model} `CLIP_MODEL` {clip_model} `CLIP_MODE` {clip_mode} `CAPTION_TYPE` {captioning_type} \n`TIME` {str(captioning_processing_time)[:4]}sec" | |
if target_mode: | |
old_descrip = descriptions[len(descriptions)-1] | |
old_descrip = old_descrip+ f"`TRGT_MODE` {target_mode} `TRGT_STR` {target_strength} `TRGT_PRMPT` {target_prompt} `NEG_TRGT_PRMPT` {negative_target_prompt}" | |
#if auto: | |
# await ctx.followup.send( str("\n".join(descriptions))+f"\nExtn: {final_prompt}", files=images) | |
#else: | |
await ctx.followup.send("\n".join(descriptions), files=images) | |
# Run the bot with your token | |
if __name__ == "__main__": | |
try: | |
client.run(os.getenv("DISCORD_BOT_TOKEN")) | |
except Exception as e: | |
print(e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment