This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Claude 3 Opus とめっちゃやり取りして動くようになった | |
# python vae_vs_taesd_gradio.py --image_dir /path/to/image/directory | |
import os | |
import argparse | |
import random | |
from PIL import Image | |
import torch | |
from diffusers import AutoencoderKL, AutoencoderTiny | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Claude 3 Opus にだいたい書いてもらった | |
# python vae_vs_taesd.py --image_dir /path/to/image/directory | |
import os | |
import argparse | |
import random | |
from PIL import Image, ImageTk | |
import torch | |
from diffusers import AutoencoderKL, AutoencoderTiny | |
import tkinter as tk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
each_control_net_enabled = [self.control_net_enabled] * len(self.control_nets) | |
for i, t in enumerate(tqdm(timesteps)): | |
# ↓ ここから | |
# test: chroma key like composition | |
if latents.shape[0] == 4: | |
# run this script with batch size 4 | |
# sample prompt for ANIMAGINE XL V3.0: 2nd prompt doesn't have detailes, because it is used for making mask | |
# green surface of green screen --n color, artifact, object, shadow, frame --d 1 | |
# 1girl, serafuku, standing, cowboy shot, green background, masterpiece, best quality --n nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name --d 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import comfy | |
from comfy.samplers import KSAMPLER | |
import torch | |
from torchvision.transforms.functional import gaussian_blur | |
from comfy.k_diffusion.sampling import default_noise_sampler, get_ancestral_step, to_d, BrownianTreeNoiseSampler | |
from tqdm.auto import trange | |
@torch.no_grad() | |
def sample_euler_ancestral( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# how much to increase the scale at each step: .125 seems to work well (because it's 1/8?) | |
# 各ステップに拡大率をどのくらい増やすか:.125がよさそう(たぶん1/8なので) | |
scale_step = 0.125 | |
# timesteps at which to start increasing the scale: model and prompt dependent | |
# 拡大を開始するtimesteps:モデルとプロンプトによる | |
start_timesteps = 800 | |
# how many steps to wait before increasing the scale again: smaller values lead to more artifacts, also depends on the total number of steps | |
# 何ステップごとに拡大するか:総ステップ数にも関係する |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def forward(self, x, timesteps=None, context=None, y=None, **kwargs): | |
# broadcast timesteps to batch dimension | |
timesteps = timesteps.expand(x.shape[0]) | |
hs = [] | |
t_emb = get_timestep_embedding(timesteps, self.model_channels) # , repeat_only=False) | |
t_emb = t_emb.to(x.dtype) | |
emb = self.time_embed(t_emb) | |
assert x.shape[0] == y.shape[0], f"batch size mismatch: {x.shape[0]} != {y.shape[0]}" |
NewerOlder