Birch-san

Instructions for training waifu-diffusion, by dep from 東方Project AI Discord
https://canary.discord.com/channels/930499730843250783/950914873800396841/1018280003416367176

git clone https://github.com/harubaru/waifu-diffusion.git
cd waifu-diffusion 
pip install omegaconf einops pytorch-lightning==1.6.5 test-tube transformers kornia 
pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers

	import torch
	from torch import einsum, matmul, bmm
	import time

	repeats = 10

	batch_duration = 0
	for ix in range(repeats):
	attn = torch.rand(16, 4096, 4096, dtype=torch.float, device="mps")
	v = torch.rand(16, 4096, 40, dtype=torch.float, device="mps")

	import torch
	from torch import einsum, tensor, matmul, bmm, baddbmm, empty
	import time

	scale=2
	repeats = 10

	# both einsum 0s use the same plan, so whichever batch runs first has to pay the price of warmup
	# uncomment this to run a warmup before either batch runs, for fairer comparison of batch avg time
	# q = torch.rand(16, 4096, 40, dtype=torch.float, device="mps")

	import matplotlib.pyplot as plt

	# …

	latents: FloatTensor = self.inner_model(x, sigma, cond=cond, **kwargs)
	unscaled: Tensor = latents / self.scale_factor
	chs = [torch.histogram(c) for c in unscaled[0].flatten(1)]
	h = torch.histogram(unscaled[0].ravel())
	plt.figure(figsize=(10,2))
	plt.title('Per-channel latent values after denoising sigma %.3f at CFG scale %d' % (sigma.item(), cfg_scale))

	CFG20
	sigma: 14.615 absmax: 6.942 std: 2.517 min: -6.840 max: 6.942 shape: [1, 4, 64, 64]
	sigma: 8.560 absmax: 16.164 std: 3.112 min: -16.164 max: 12.644 shape: [1, 4, 64, 64]
	sigma: 8.560 absmax: 28.488 std: 4.065 min: -19.071 max: 28.488 shape: [1, 4, 64, 64]
	sigma: 4.797 absmax: 12.463 std: 1.584 min: -10.655 max: 12.463 shape: [1, 4, 64, 64]
	sigma: 4.797 absmax: 24.024 std: 1.559 min: -24.024 max: 11.797 shape: [1, 4, 64, 64]
	sigma: 2.551 absmax: 5.675 std: 1.151 min: -5.675 max: 4.937 shape: [1, 4, 64, 64]
	sigma: 2.551 absmax: 10.035 std: 1.201 min: -10.035 max: 5.773 shape: [1, 4, 64, 64]
	sigma: 1.274 absmax: 4.725 std: 1.095 min: -3.790 max: 4.725 shape: [1, 4, 64, 64]
	sigma: 1.274 absmax: 4.997 std: 1.104 min: -4.997 max: 4.111 shape: [1, 4, 64, 64]

	/Users/birch/anaconda3/envs/ldmwaifu-stable/lib/python3.10/site-packages/torch/autograd/__init__.py:173: UserWarning: Error detected in NativeLayerNormBackward0. Traceback of forward call that caused the error:
	File "/Users/birch/anaconda3/envs/ldmwaifu-stable/lib/python3.10/runpy.py", line 196, in _run_module_as_main
	return _run_code(code, main_globals, None,
	File "/Users/birch/anaconda3/envs/ldmwaifu-stable/lib/python3.10/runpy.py", line 86, in _run_code
	exec(code, run_globals)
	File "/Users/birch/.vscode/extensions/ms-python.python-2022.14.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/__main__.py", line 39, in <module>
	cli.main()
	File "/Users/birch/.vscode/extensions/ms-python.python-2022.14.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main
	run()
	File "/Users/birch/.vscode/extensions/ms-python.python-2022.14.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../d

	------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------
	Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls
	------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------
	job 0.27% 29.274ms 100.00% 10.976s 10.976s 1
	iteration 0.00% 135.000us 98.68% 10.831s 10.831s 1
	batch 0.92% 101.481ms 98.68% 10.831s 10.831s 1
	kdiff_sample 0.00% 281.000us 93.19% 10.229s 10.229s 1

	supported sigmas:
	tensor([ 0.0292, 0.0413, 0.0507, 0.0586, 0.0656, 0.0720, 0.0779, 0.0834,
	0.0886, 0.0936, 0.0983, 0.1028, 0.1072, 0.1114, 0.1155, 0.1195,
	0.1234, 0.1271, 0.1308, 0.1345, 0.1380, 0.1415, 0.1449, 0.1482,
	0.1515, 0.1548, 0.1580, 0.1611, 0.1642, 0.1673, 0.1704, 0.1734,
	0.1763, 0.1793, 0.1822, 0.1850, 0.1879, 0.1907, 0.1935, 0.1963,
	0.1991, 0.2018, 0.2045, 0.2072, 0.2099, 0.2125, 0.2152, 0.2178,
	0.2204, 0.2230, 0.2256, 0.2281, 0.2307, 0.2332, 0.2358, 0.2383,
	0.2408, 0.2433, 0.2458, 0.2482, 0.2507, 0.2531, 0.2556, 0.2580,
	0.2604, 0.2628, 0.2653, 0.2677, 0.2700, 0.2724, 0.2748, 0.2772,

	from torch.nn import Embedding

	from typing import Tuple, TypeVar, Iterable
	from typing_extensions import TypeAlias
	from enum import Enum, auto
	from math import ceil
	from torch import BoolTensor, LongTensor, sparse_coo_tensor, ones
	from itertools import chain

	class Label(Enum):