Created
February 14, 2024 13:36
-
-
Save laksjdjf/eda78aa762ee71ee259f7df9a58d88e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=========================================================================================================================================================== | |
Layer (type (var_name)) Input Shape Output Shape Param # Kernel Shape | |
=========================================================================================================================================================== | |
StableCascadeUnet (StableCascadeUnet) [2, 16, 24, 24] [2, 16, 24, 24] 8,923,136 3 | |
├─Linear (clip_txt_pooled_mapper) [2, 77, 1280] [2, 77, 8192] 10,493,952 -- | |
├─LayerNorm (clip_norm) [2, 308, 2048] [2, 308, 2048] -- -- | |
├─Sequential (embedding) [2, 16, 24, 24] [2, 2048, 24, 24] -- -- | |
│ └─PixelUnshuffle (0) [2, 16, 24, 24] [2, 16, 24, 24] -- -- | |
│ └─Conv2d (1) [2, 16, 24, 24] [2, 2048, 24, 24] 34,816 [1, 1] | |
│ └─WuerstchenLayerNorm (2) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
├─ModuleList (down_downscalers) -- -- (recursive) -- | |
│ └─Identity (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
├─ModuleList (down_blocks) -- -- (recursive) -- | |
│ └─ModuleList (0) -- -- -- -- | |
│ │ └─ResBlockStageB (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (2) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (3) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (4) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (5) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (6) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (7) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (8) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (9) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (10) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (11) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (12) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (13) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (14) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (15) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (16) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (17) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (18) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (19) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (20) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (21) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (22) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (23) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
├─ModuleList (down_downscalers) -- -- (recursive) -- | |
│ └─Sequential (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ └─WuerstchenLayerNorm (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ └─UpDownBlock2d (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─ModuleList (blocks) -- -- -- -- | |
│ │ │ │ └─Conv2d (0) [2, 2048, 24, 24] [2, 2048, 24, 24] 4,196,352 [1, 1] | |
│ │ │ │ └─Identity (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
├─ModuleList (down_blocks) -- -- (recursive) -- | |
│ └─ModuleList (1) -- -- -- -- | |
│ │ └─ResBlockStageB (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (2) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (3) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (4) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (5) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (6) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (7) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (8) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (9) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (10) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (11) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (12) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (13) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (14) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (15) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (16) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (17) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (18) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (19) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (20) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (21) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (22) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (23) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (24) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (25) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (26) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (27) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (28) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (29) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (30) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (31) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (32) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (33) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (34) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (35) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (36) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (37) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (38) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (39) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (40) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (41) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (42) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (43) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (44) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (45) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (46) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (47) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (48) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (49) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (50) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (51) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (52) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (53) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (54) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (55) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (56) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (57) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (58) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (59) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (60) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (61) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (62) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (63) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (64) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (65) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (66) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (67) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (68) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (69) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (70) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (71) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
├─ModuleList (up_blocks) -- -- (recursive) -- | |
│ └─ModuleList (0) -- -- -- -- | |
│ │ └─ResBlockStageB (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (2) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (3) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (4) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (5) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (6) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (7) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (8) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (9) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (10) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (11) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (12) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (13) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (14) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (15) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (16) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (17) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (18) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (19) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (20) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (21) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (22) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (23) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (24) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (25) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (26) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (27) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (28) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (29) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (30) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (31) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (32) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (33) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (34) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (35) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (36) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (37) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (38) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (39) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (40) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (41) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (42) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (43) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (44) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (45) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (46) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (47) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (48) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (49) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (50) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (51) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (52) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (53) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (54) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (55) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (56) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (57) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (58) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (59) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (60) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (61) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (62) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (63) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (64) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (65) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (66) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (67) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (68) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (69) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (70) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (71) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
├─ModuleList (up_upscalers) -- -- (recursive) -- | |
│ └─Sequential (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ └─WuerstchenLayerNorm (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ └─UpDownBlock2d (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─ModuleList (blocks) -- -- -- -- | |
│ │ │ │ └─Identity (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─Conv2d (1) [2, 2048, 24, 24] [2, 2048, 24, 24] 4,196,352 [1, 1] | |
├─ModuleList (up_blocks) -- -- (recursive) -- | |
│ └─ModuleList (1) -- -- -- -- | |
│ │ └─ResBlockStageB (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 4096] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 4096] [2, 24, 24, 8192] 33,562,624 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (2) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (3) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (4) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (5) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (6) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (7) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (8) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (9) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (10) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (11) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (12) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (13) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (14) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (15) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (16) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (17) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (18) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (19) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (20) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
│ │ └─ResBlockStageB (21) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Conv2d (depthwise) [2, 2048, 24, 24] [2, 2048, 24, 24] 20,480 [3, 3] | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (channelwise) [2, 24, 24, 2048] [2, 24, 24, 2048] -- -- | |
│ │ │ │ └─Linear (0) [2, 24, 24, 2048] [2, 24, 24, 8192] 16,785,408 -- | |
│ │ │ │ └─GELU (1) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─GlobalResponseNorm (2) [2, 24, 24, 8192] [2, 24, 24, 8192] 16,384 -- | |
│ │ │ │ └─Dropout (3) [2, 24, 24, 8192] [2, 24, 24, 8192] -- -- | |
│ │ │ │ └─Linear (4) [2, 24, 24, 8192] [2, 24, 24, 2048] 16,779,264 -- | |
│ │ └─TimestepBlock (22) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─LoRACompatibleLinear (mapper) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_sca) [1, 64] [1, 4096] 266,240 -- | |
│ │ │ └─LoRACompatibleLinear (mapper_crp) [1, 64] [1, 4096] 266,240 -- | |
│ │ └─AttnBlock (23) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Sequential (kv_mapper) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─SiLU (0) [2, 308, 2048] [2, 308, 2048] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (1) [2, 308, 2048] [2, 308, 2048] 4,196,352 -- | |
│ │ │ └─WuerstchenLayerNorm (norm) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ └─Attention (attention) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ │ │ │ └─LoRACompatibleLinear (to_q) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_k) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─LoRACompatibleLinear (to_v) [2, 884, 2048] [2, 884, 2048] 4,196,352 -- | |
│ │ │ │ └─ModuleList (to_out) -- -- -- -- | |
│ │ │ │ │ └─LoRACompatibleLinear (0) [2, 576, 2048] [2, 576, 2048] 4,196,352 -- | |
│ │ │ │ │ └─Dropout (1) [2, 576, 2048] [2, 576, 2048] -- -- | |
├─ModuleList (up_upscalers) -- -- (recursive) -- | |
│ └─Identity (1) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
├─Sequential (clf) [2, 2048, 24, 24] [2, 16, 24, 24] -- -- | |
│ └─WuerstchenLayerNorm (0) [2, 2048, 24, 24] [2, 2048, 24, 24] -- -- | |
│ └─Conv2d (1) [2, 2048, 24, 24] [2, 16, 24, 24] 32,784 [1, 1] | |
│ └─PixelShuffle (2) [2, 16, 24, 24] [2, 16, 24, 24] -- -- | |
=========================================================================================================================================================== | |
Total params: 3,589,103,632 | |
Trainable params: 3,589,103,632 | |
Non-trainable params: 0 | |
Total mult-adds (G): 18.34 | |
=========================================================================================================================================================== | |
Input size (MB): 0.44 | |
Forward/backward pass size (MB): 9461.18 | |
Params size (MB): 7160.36 | |
Estimated Total Size (MB): 16621.98 | |
=========================================================================================================================================================== |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment