This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
class KaiserLowpass(nn.Module): | |
def __init__(self, width=7, beta=11, periodic=False, padding_mode='replicate'): | |
super().__init__() | |
self.padding_mode = padding_mode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
class Residual(nn.Module): | |
def __init__(self, fn): | |
super().__init__() | |
self.fn = fn | |
def forward(self, x, *args, **kwargs): | |
return self.fn(x, *args, **kwargs) + x |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
class Sobel(nn.Module): | |
def __init__(self,structure=False,scharr=True, padding_mode='reflect'): | |
super().__init__() | |
self.structure = structure |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def vqgan_dec_skip_lores_attn(h, temb=None): | |
# middle | |
h = vqgan.decoder.mid.block_1(h, temb) | |
h_half = F.upsample(h,scale_factor=0.5,mode='bicubic',align_corners=False) | |
h_half = vqgan.decoder.mid.attn_1(h_half) - h_half | |
h_half = F.upsample(h_half,scale_factor=2,mode='bicubic',align_corners=False) | |
h = h + h_half | |
h = vqgan.decoder.mid.block_2(h, temb) | |
# upsampling |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Perform max pool 2d with indicies on a tensor | |
max_size = 8 | |
max_output, max_indices = F.max_pool2d_with_indices(input_tensor,max_size) | |
# Unpool it to get a tensor of the original size with zeros in all non-max areas | |
max_unpool = F.max_unpool2d(max_output,max_indices,max_size,max_size) | |
# Unpool it using a tensor of ones with the same indices to get ones where the tensor was sampled | |
max_mask = F.max_unpool2d(torch.ones_like(max_output),max_indices,max_size,max_size) | |
# Makes a kernel that's round and the distance from the center |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#1 is end and 0 is start in the map. | |
def map_blur(img,map,s_start=0.375,s_end=8,steps=8): | |
img_slices = img * 0 | |
map_slices = map * 0 | |
for s in range(steps): | |
sigma = (s/(steps-1)) * (s_end-s_start) + s_start | |
slice_start = (s+0)/steps | |
slice_end = (s+1)/steps | |
map_slice = torch.logical_and( | |
torch.greater_equal(map,slice_start), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
I used CLIP ViT-L/14 token embeddings for tokens that were ASCII-only, ended in </w> meaning it wasn't a prefix (but doesn't garuntee it's a full word), started with a letter, had 3 or more letters, and wasn't the end/start tokens. | |
Due to memory constraints I had to break it down to a smaller number of channels so it's using the PCA of these, reducing 768 channels to 512. From there, kmeans using https://github.com/subhadarship/kmeans_pytorch with 128 clusters and cosine distance within pytorch's autocast wrapper in the hopes of saving from memory. | |
The cluster IDs from it were then used to average the non-PCA tokens, so the PCA was only affecting the clustering but not the actual values of the clusters. The averaging was done in .half() precision for memory. | |
The words listed are the 64 best matching tokens (cosine similarity) for each cluster center. | |
Cluster 0 : | |
marker | |
markers | |
signaling |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
def prompt_combinations(prompt_parts): | |
''' | |
Provide a list of lists of prompt parts, like: | |
[ ["A ","An "], ["anteater","feather duster"] ] | |
''' | |
opt_prompt = list(itertools.product(*prompt_parts, repeat=1)) | |
opt_prompt = [''.join(opt_prompt[b]) for b in range(len(opt_prompt))] | |
return opt_prompt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torchvision.transforms.functional as TF | |
1class CFGDenoiserSlew(nn.Module): | |
''' | |
Clamps the maximum change each step can have. | |
"limit" is the clamp bounds. 0.4-0.8 seem good, 1.6 and 3.2 have very little difference and might represent the upper bound of values. | |
"blur" is the radius of a gaussian blur used to split the limited output with the original output in an attempt to preserve detail and color. | |
"last_step_is_blur" if true will compare the model output to the blur-split output rather than just the limited output, can look nicer. |