https://github.com/apple/ml-stable-diffusion/blob/main/README.md
python -m python_coreml_stable_diffusion.torch2coreml \
--model-version stabilityai/stable-diffusion-2-1 \
--convert-unet \
--convert-text-encoder \
--convert-vae-decoder \
-o ~/Documents/AI/Models/SD-CoreML-Packages/Stable_Diffusion_version_stabilityai_stable-diffusion-2-1for attention in ORIGINAL SPLIT_EINSUM ; do
time python -m python_coreml_stable_diffusion.torch2coreml \
--model-version runwayml/stable-diffusion-v1-5 \
--convert-unet \
--unet-support-controlnet \
--convert-text-encoder \
--convert-vae-decoder \
--convert-safety-checker \
--convert-controlnet \
lllyasviel/sd-controlnet-canny \
lllyasviel/sd-controlnet-depth \
lllyasviel/sd-controlnet-hed \
lllyasviel/sd-controlnet-mlsd \
lllyasviel/sd-controlnet-normal \
lllyasviel/sd-controlnet-openpose \
lllyasviel/sd-controlnet-scribble \
lllyasviel/sd-controlnet-seg \
lllyasviel/sd-controlnet-mlsd \
--attention-implementation $attention \
-o ~/Documents/AI/Models/SD-CoreML-Packages/runwayml_stable-diffusion-v1-5_$attention
donepython -m python_coreml_stable_diffusion.pipeline \
--prompt "a photo of an astronaut riding a horse on mars" \
-i ~/Documents/AI/Models/SD-CoreML-Packages/runwayml_stable-diffusion-v1-5_SPLIT_EINSUM \
-o . \
--compute-unit ALL \
--seed 93 \
--model-version runwayml/stable-diffusion-v1-5# https://stackoverflow.com/questions/43418533/running-swift-build-in-terminal-leading-to-platform-path-errors
sudo xcode-select -switch /Applications/Xcode.app/Contents/Developerfor unit in all cpuOnly cpuAndGPU cpuAndNeuralEngine ; do
time swift run StableDiffusionSample --resource-path \
~/Documents/AI/Models/SD-CoreML/stable-diffusion-v2.1-base_split-einsum_compiled/ \
--compute-units $unit \
"a photo of an astronaut riding a horse on mars"
done# cpuOnly is 17x slower than all (despite monopolizing 5 cores of an M2 Max processor)
# all is 5% slower than cpuAndGPU, but uses ~50-60% of GPU
# cpuAndGPU uses 96% of GPU + 40% CPU
# cpuAndNeuralEngine is within 2% of cpuAndGPU and uses 0% of GPU + 7% CPU
# SD 2.1 768x768 split_einsum: 10 images at 25 steps w/o safety:
# 90sec w/ cpuAndGPU
# 96sec w/ all
# 100sec w/ cpuAndNeuralEngine
hyperfine -L unit cpuAndGPU,cpuAndNeuralEngine \
--warmup 1 \
--runs 3 \
'swift run StableDiffusionSample \
--resource-path \
~/Documents/AI/Models/SD-CoreML/stable-diffusion-v2.1-base_split-einsum_compiled/ \
--image-count 10 \
--step-count 25 \
--guidance-scale 7.5 \
--seed 123 \
--scheduler dpmpp \
--save-every 0 \
--disable-safety \
--compute-units {unit} \
"a photo of an astronaut riding a horse on mars" \
--negative-prompt "low quality" \
'--controlnet a b c
--controlnet-inputs a.png b.png c.png
--reduce-memory
--image start.png --strength 0.5