Created
November 7, 2023 06:21
-
-
Save laksjdjf/cc200ffc65c6db51cc549ccd81e45221 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ================================================================================================================================================================ | |
| Layer (type (var_name)) Input Shape Output Shape Param # Kernel Shape | |
| ================================================================================================================================================================ | |
| Transformer2DModel (Transformer2DModel) [1, 4, 32, 32] [1, 8, 32, 32] 2,304 -- | |
| ├─PatchEmbed (pos_embed) [1, 4, 32, 32] [1, 256, 1152] -- -- | |
| │ └─Conv2d (proj) [1, 4, 32, 32] [1, 1152, 16, 16] 19,584 [2, 2] | |
| ├─AdaLayerNormSingle (adaln_single) [1] [1, 6912] -- -- | |
| │ └─CombinedTimestepSizeEmbeddings (emb) [1] [1, 1152] -- -- | |
| │ │ └─Timesteps (time_proj) [1] [1, 256] -- -- | |
| │ │ └─TimestepEmbedding (timestep_embedder) [1, 256] [1, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (linear_1) [1, 256] [1, 1152] 296,064 -- | |
| │ │ └─TimestepEmbedding (aspect_ratio_embedder) -- -- (recursive) -- | |
| │ │ │ └─SiLU (act) [1, 1152] [1, 1152] -- -- | |
| │ │ └─TimestepEmbedding (timestep_embedder) -- -- (recursive) -- | |
| │ │ │ └─LoRACompatibleLinear (linear_2) [1, 1152] [1, 1152] 1,328,256 -- | |
| │ │ └─Timesteps (additional_condition_proj) [2] [2, 256] -- -- | |
| │ │ └─TimestepEmbedding (resolution_embedder) [2, 256] [2, 384] 147,840 -- | |
| │ │ │ └─LoRACompatibleLinear (linear_1) [2, 256] [2, 384] 98,688 -- | |
| │ │ └─TimestepEmbedding (aspect_ratio_embedder) -- -- (recursive) -- | |
| │ │ │ └─SiLU (act) [2, 384] [2, 384] -- -- | |
| │ │ └─TimestepEmbedding (resolution_embedder) -- -- (recursive) -- | |
| │ │ │ └─LoRACompatibleLinear (linear_2) [2, 384] [2, 384] 147,840 -- | |
| │ │ └─Timesteps (additional_condition_proj) [1] [1, 256] -- -- | |
| │ │ └─TimestepEmbedding (aspect_ratio_embedder) [1, 256] [1, 384] -- -- | |
| │ │ │ └─LoRACompatibleLinear (linear_1) [1, 256] [1, 384] 98,688 -- | |
| │ │ │ └─SiLU (act) [1, 384] [1, 384] -- -- | |
| │ │ │ └─LoRACompatibleLinear (linear_2) [1, 384] [1, 384] 147,840 -- | |
| │ └─SiLU (silu) [1, 1152] [1, 1152] -- -- | |
| │ └─Linear (linear) [1, 1152] [1, 6912] 7,969,536 -- | |
| ├─CaptionProjection (caption_projection) [1, 130, 4096] [1, 130, 1152] -- -- | |
| │ └─Linear (linear_1) [1, 130, 4096] [1, 130, 1152] 4,719,744 -- | |
| │ └─GELU (act_1) [1, 130, 1152] [1, 130, 1152] -- -- | |
| │ └─Linear (linear_2) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| ├─ModuleList (transformer_blocks) -- -- -- -- | |
| │ └─BasicTransformerBlock (0) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (1) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (2) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (3) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (4) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (5) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (6) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (7) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (8) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (9) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (10) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (11) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (12) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (13) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (14) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (15) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (16) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (17) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (18) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (19) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (20) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (21) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (22) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (23) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (24) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (25) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (26) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| │ └─BasicTransformerBlock (27) [1, 256, 1152] [1, 256, 1152] 6,912 -- | |
| │ │ └─LayerNorm (norm1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─Attention (attn2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─LoRACompatibleLinear (to_q) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_k) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─LoRACompatibleLinear (to_v) [1, 130, 1152] [1, 130, 1152] 1,328,256 -- | |
| │ │ │ └─ModuleList (to_out) -- -- -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (0) [1, 256, 1152] [1, 256, 1152] 1,328,256 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─LayerNorm (norm2) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ └─FeedForward (ff) [1, 256, 1152] [1, 256, 1152] -- -- | |
| │ │ │ └─ModuleList (net) -- -- -- -- | |
| │ │ │ │ └─GELU (0) [1, 256, 1152] [1, 256, 4608] -- -- | |
| │ │ │ │ │ └─Linear (proj) [1, 256, 1152] [1, 256, 4608] 5,313,024 -- | |
| │ │ │ │ └─Dropout (1) [1, 256, 4608] [1, 256, 4608] -- -- | |
| │ │ │ │ └─LoRACompatibleLinear (2) [1, 256, 4608] [1, 256, 1152] 5,309,568 -- | |
| ├─LayerNorm (norm_out) [1, 256, 1152] [1, 256, 1152] -- -- | |
| ├─Linear (proj_out) [1, 256, 1152] [1, 256, 32] 36,896 -- | |
| ================================================================================================================================================================ | |
| Total params: 612,825,248 | |
| Trainable params: 612,825,248 | |
| Non-trainable params: 0 | |
| Total mult-adds (M): 616.39 | |
| ================================================================================================================================================================ | |
| Input size (MB): 2.15 | |
| Forward/backward pass size (MB): 798.67 | |
| Params size (MB): 2444.61 | |
| Estimated Total Size (MB): 3245.43 | |
| ================================================================================================================================================================ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment