Created
November 14, 2023 06:17
-
-
Save sssemil/d0141343d693fa7c77bd72180aeca56e to your computer and use it in GitHub Desktop.
Minified repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import inf | |
import torch | |
from torch import tensor, device | |
import torch.fx as fx | |
import torch._dynamo | |
from torch._dynamo.testing import rand_strided | |
from torch._dynamo.debug_utils import run_fwd_maybe_bwd | |
import torch._dynamo.config | |
import torch._inductor.config | |
import torch._functorch.config | |
import torch.fx.experimental._config | |
from torch.nn import * | |
class Repro(torch.nn.Module): | |
def __init__(self): | |
super().__init__() | |
self.L__self___time_embedding_act = SiLU() | |
self.L__self___conv_in = Conv2d(4, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)).cuda() | |
self.L__self___down_blocks_0_resnets_0_norm1 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_0_resnets_0_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_0_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___down_blocks_0_resnets_1_norm1 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_0_resnets_1_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_0_resnets_1_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___down_blocks_1_resnets_0_norm1 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_1_resnets_0_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_1_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___down_blocks_1_attentions_0_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda() | |
self.L__self___down_blocks_1_attentions_0_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_1_attentions_0_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_1_resnets_1_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_1_resnets_1_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_1_resnets_1_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___down_blocks_1_attentions_1_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda() | |
self.L__self___down_blocks_1_attentions_1_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_1_attentions_1_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_resnets_0_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_2_resnets_0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_2_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___down_blocks_2_attentions_0_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_2 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_3 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_4 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_5 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_6 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_7 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_8 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_0_transformer_blocks_9 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_resnets_1_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_2_resnets_1_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___down_blocks_2_resnets_1_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___down_blocks_2_attentions_1_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_2 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_3 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_4 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_5 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_6 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_7 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_8 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___down_blocks_2_attentions_1_transformer_blocks_9 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_resnets_0_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___mid_block_resnets_0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___mid_block_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___mid_block_attentions_0_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_2 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_3 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_4 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_5 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_6 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_7 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_8 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_attentions_0_transformer_blocks_9 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___mid_block_resnets_slice_1__None__None___0_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___mid_block_resnets_slice_1__None__None___0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___mid_block_resnets_slice_1__None__None___0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_0_resnets_0_norm1 = GroupNorm(32, 2560, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_0_resnets_0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_0_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_0_attentions_0_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_2 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_3 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_4 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_5 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_6 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_7 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_8 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_0_transformer_blocks_9 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_resnets_1_norm1 = GroupNorm(32, 2560, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_0_resnets_1_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_0_resnets_1_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_0_attentions_1_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_2 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_3 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_4 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_5 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_6 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_7 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_8 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_1_transformer_blocks_9 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_resnets_2_norm1 = GroupNorm(32, 1920, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_0_resnets_2_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_0_resnets_2_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_0_attentions_2_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_2 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_3 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_4 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_5 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_6 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_7 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_8 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_0_attentions_2_transformer_blocks_9 = BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_1_resnets_0_norm1 = GroupNorm(32, 1920, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_1_resnets_0_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_1_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_1_attentions_0_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda() | |
self.L__self___up_blocks_1_attentions_0_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_1_attentions_0_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_1_resnets_1_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_1_resnets_1_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_1_resnets_1_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_1_attentions_1_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda() | |
self.L__self___up_blocks_1_attentions_1_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_1_attentions_1_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_1_resnets_2_norm1 = GroupNorm(32, 960, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_1_resnets_2_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_1_resnets_2_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_1_attentions_2_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda() | |
self.L__self___up_blocks_1_attentions_2_transformer_blocks_0 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_1_attentions_2_transformer_blocks_1 = BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn2): Attention( | |
(to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False) | |
(to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
).cuda() | |
self.L__self___up_blocks_2_resnets_0_norm1 = GroupNorm(32, 960, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_2_resnets_0_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_2_resnets_0_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_2_resnets_1_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_2_resnets_1_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_2_resnets_1_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___up_blocks_2_resnets_2_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_2_resnets_2_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___up_blocks_2_resnets_2_dropout = Dropout(p=0.0, inplace=False) | |
self.L__self___conv_norm_out = GroupNorm(32, 320, eps=1e-05, affine=True).cuda() | |
self.L__self___conv_out = Conv2d(320, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)).cuda() | |
self.L__self___time_embedding_linear_1_weight = torch.nn.Parameter(torch.randn([1280, 320], dtype=torch.float16, device="cuda")) | |
self.L__self___time_embedding_linear_1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___time_embedding_linear_2_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___time_embedding_linear_2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___add_embedding_linear_1_weight = torch.nn.Parameter(torch.randn([1280, 2816], dtype=torch.float16, device="cuda")) | |
self.L__self___add_embedding_linear_1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___add_embedding_linear_2_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___add_embedding_linear_2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_downsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_0_downsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([640, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 320, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_downsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_1_downsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 640, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___down_blocks_2_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 2560, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 2560, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([1280, 2560, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 2560, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_1_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1920, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 1920, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_resnets_2_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_2_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_2_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_2_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_attentions_2_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_upsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_0_upsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([640, 1920, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 1920, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([640, 1280, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 1280, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_1_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_conv1_weight = torch.nn.Parameter(torch.randn([640, 960, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 960, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_resnets_2_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_2_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_2_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_2_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_attentions_2_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_upsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_1_upsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([320, 960, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([320, 960, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([320, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_conv_shortcut_weight = torch.nn.Parameter(torch.randn([320, 640, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_1_conv_shortcut_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_conv1_weight = torch.nn.Parameter(torch.randn([320, 640, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_conv_shortcut_weight = torch.nn.Parameter(torch.randn([320, 640, 1, 1], dtype=torch.float16, device="cuda")) | |
self.L__self___up_blocks_2_resnets_2_conv_shortcut_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda")) | |
def forward(self, L_sample_ : torch.Tensor, L_timestep_ : torch.Tensor, L_added_cond_kwargs_text_embeds_ : torch.Tensor, L_added_cond_kwargs_time_ids_ : torch.Tensor, L_encoder_hidden_states_ : torch.Tensor): | |
l_sample_ = L_sample_ | |
timesteps = L_timestep_ | |
text_embeds = L_added_cond_kwargs_text_embeds_ | |
time_ids = L_added_cond_kwargs_time_ids_ | |
l_encoder_hidden_states_ = L_encoder_hidden_states_ | |
getitem = timesteps[None]; timesteps = None | |
timesteps_1 = getitem.to(device(type='cuda', index=0)); getitem = None | |
timesteps_2 = timesteps_1.expand(2); timesteps_1 = None | |
arange = torch.arange(start = 0, end = 160, dtype = torch.float32, device = device(type='cuda', index=0)) | |
exponent = -9.210340371976184 * arange; arange = None | |
exponent_1 = exponent / 160; exponent = None | |
emb = torch.exp(exponent_1); exponent_1 = None | |
getitem_1 = timesteps_2[(slice(None, None, None), None)]; timesteps_2 = None | |
float_1 = getitem_1.float(); getitem_1 = None | |
getitem_2 = emb[(None, slice(None, None, None))]; emb = None | |
emb_1 = float_1 * getitem_2; float_1 = getitem_2 = None | |
emb_2 = 1 * emb_1; emb_1 = None | |
sin = torch.sin(emb_2) | |
cos = torch.cos(emb_2); emb_2 = None | |
emb_3 = torch.cat([sin, cos], dim = -1); sin = cos = None | |
getitem_3 = emb_3[(slice(None, None, None), slice(160, None, None))] | |
getitem_4 = emb_3[(slice(None, None, None), slice(None, 160, None))]; emb_3 = None | |
t_emb = torch.cat([getitem_3, getitem_4], dim = -1); getitem_3 = getitem_4 = None | |
t_emb_1 = t_emb.to(dtype = torch.float16); t_emb = None | |
l__self___time_embedding_linear_1_weight = self.L__self___time_embedding_linear_1_weight | |
l__self___time_embedding_linear_1_bias = self.L__self___time_embedding_linear_1_bias | |
sample = torch._C._nn.linear(t_emb_1, l__self___time_embedding_linear_1_weight, l__self___time_embedding_linear_1_bias); t_emb_1 = l__self___time_embedding_linear_1_weight = l__self___time_embedding_linear_1_bias = None | |
sample_1 = self.L__self___time_embedding_act(sample); sample = None | |
l__self___time_embedding_linear_2_weight = self.L__self___time_embedding_linear_2_weight | |
l__self___time_embedding_linear_2_bias = self.L__self___time_embedding_linear_2_bias | |
emb_5 = torch._C._nn.linear(sample_1, l__self___time_embedding_linear_2_weight, l__self___time_embedding_linear_2_bias); sample_1 = l__self___time_embedding_linear_2_weight = l__self___time_embedding_linear_2_bias = None | |
flatten = time_ids.flatten(); time_ids = None | |
arange_1 = torch.arange(start = 0, end = 128, dtype = torch.float32, device = device(type='cuda', index=0)) | |
exponent_2 = -9.210340371976184 * arange_1; arange_1 = None | |
exponent_3 = exponent_2 / 128; exponent_2 = None | |
emb_6 = torch.exp(exponent_3); exponent_3 = None | |
getitem_5 = flatten[(slice(None, None, None), None)]; flatten = None | |
float_2 = getitem_5.float(); getitem_5 = None | |
getitem_6 = emb_6[(None, slice(None, None, None))]; emb_6 = None | |
emb_7 = float_2 * getitem_6; float_2 = getitem_6 = None | |
emb_8 = 1 * emb_7; emb_7 = None | |
sin_1 = torch.sin(emb_8) | |
cos_1 = torch.cos(emb_8); emb_8 = None | |
emb_9 = torch.cat([sin_1, cos_1], dim = -1); sin_1 = cos_1 = None | |
getitem_7 = emb_9[(slice(None, None, None), slice(128, None, None))] | |
getitem_8 = emb_9[(slice(None, None, None), slice(None, 128, None))]; emb_9 = None | |
time_embeds = torch.cat([getitem_7, getitem_8], dim = -1); getitem_7 = getitem_8 = None | |
time_embeds_1 = time_embeds.reshape((2, -1)); time_embeds = None | |
add_embeds = torch.concat([text_embeds, time_embeds_1], dim = -1); text_embeds = time_embeds_1 = None | |
add_embeds_1 = add_embeds.to(torch.float16); add_embeds = None | |
l__self___add_embedding_linear_1_weight = self.L__self___add_embedding_linear_1_weight | |
l__self___add_embedding_linear_1_bias = self.L__self___add_embedding_linear_1_bias | |
sample_3 = torch._C._nn.linear(add_embeds_1, l__self___add_embedding_linear_1_weight, l__self___add_embedding_linear_1_bias); add_embeds_1 = l__self___add_embedding_linear_1_weight = l__self___add_embedding_linear_1_bias = None | |
sample_4 = self.L__self___time_embedding_act(sample_3); sample_3 = None | |
l__self___add_embedding_linear_2_weight = self.L__self___add_embedding_linear_2_weight | |
l__self___add_embedding_linear_2_bias = self.L__self___add_embedding_linear_2_bias | |
aug_emb = torch._C._nn.linear(sample_4, l__self___add_embedding_linear_2_weight, l__self___add_embedding_linear_2_bias); sample_4 = l__self___add_embedding_linear_2_weight = l__self___add_embedding_linear_2_bias = None | |
emb_11 = emb_5 + aug_emb; emb_5 = aug_emb = None | |
res_hidden_states_8 = self.L__self___conv_in(l_sample_); l_sample_ = None | |
hidden_states_1 = self.L__self___down_blocks_0_resnets_0_norm1(res_hidden_states_8) | |
hidden_states_2 = self.L__self___time_embedding_act(hidden_states_1); hidden_states_1 = None | |
l__self___down_blocks_0_resnets_0_conv1_weight = self.L__self___down_blocks_0_resnets_0_conv1_weight | |
l__self___down_blocks_0_resnets_0_conv1_bias = self.L__self___down_blocks_0_resnets_0_conv1_bias | |
hidden_states_3 = torch.conv2d(hidden_states_2, l__self___down_blocks_0_resnets_0_conv1_weight, l__self___down_blocks_0_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_2 = l__self___down_blocks_0_resnets_0_conv1_weight = l__self___down_blocks_0_resnets_0_conv1_bias = None | |
temb = self.L__self___time_embedding_act(emb_11) | |
l__self___down_blocks_0_resnets_0_time_emb_proj_weight = self.L__self___down_blocks_0_resnets_0_time_emb_proj_weight | |
l__self___down_blocks_0_resnets_0_time_emb_proj_bias = self.L__self___down_blocks_0_resnets_0_time_emb_proj_bias | |
out_4 = torch._C._nn.linear(temb, l__self___down_blocks_0_resnets_0_time_emb_proj_weight, l__self___down_blocks_0_resnets_0_time_emb_proj_bias); temb = l__self___down_blocks_0_resnets_0_time_emb_proj_weight = l__self___down_blocks_0_resnets_0_time_emb_proj_bias = None | |
temb_1 = out_4[(slice(None, None, None), slice(None, None, None), None, None)]; out_4 = None | |
hidden_states_4 = hidden_states_3 + temb_1; hidden_states_3 = temb_1 = None | |
hidden_states_5 = self.L__self___down_blocks_0_resnets_0_norm2(hidden_states_4); hidden_states_4 = None | |
hidden_states_6 = self.L__self___time_embedding_act(hidden_states_5); hidden_states_5 = None | |
hidden_states_7 = self.L__self___down_blocks_0_resnets_0_dropout(hidden_states_6); hidden_states_6 = None | |
l__self___down_blocks_0_resnets_0_conv2_weight = self.L__self___down_blocks_0_resnets_0_conv2_weight | |
l__self___down_blocks_0_resnets_0_conv2_bias = self.L__self___down_blocks_0_resnets_0_conv2_bias | |
hidden_states_8 = torch.conv2d(hidden_states_7, l__self___down_blocks_0_resnets_0_conv2_weight, l__self___down_blocks_0_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_7 = l__self___down_blocks_0_resnets_0_conv2_weight = l__self___down_blocks_0_resnets_0_conv2_bias = None | |
add_2 = res_hidden_states_8 + hidden_states_8; hidden_states_8 = None | |
res_hidden_states_7 = add_2 / 1.0; add_2 = None | |
hidden_states_11 = self.L__self___down_blocks_0_resnets_1_norm1(res_hidden_states_7) | |
hidden_states_12 = self.L__self___time_embedding_act(hidden_states_11); hidden_states_11 = None | |
l__self___down_blocks_0_resnets_1_conv1_weight = self.L__self___down_blocks_0_resnets_1_conv1_weight | |
l__self___down_blocks_0_resnets_1_conv1_bias = self.L__self___down_blocks_0_resnets_1_conv1_bias | |
hidden_states_13 = torch.conv2d(hidden_states_12, l__self___down_blocks_0_resnets_1_conv1_weight, l__self___down_blocks_0_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_12 = l__self___down_blocks_0_resnets_1_conv1_weight = l__self___down_blocks_0_resnets_1_conv1_bias = None | |
temb_2 = self.L__self___time_embedding_act(emb_11) | |
l__self___down_blocks_0_resnets_1_time_emb_proj_weight = self.L__self___down_blocks_0_resnets_1_time_emb_proj_weight | |
l__self___down_blocks_0_resnets_1_time_emb_proj_bias = self.L__self___down_blocks_0_resnets_1_time_emb_proj_bias | |
out_5 = torch._C._nn.linear(temb_2, l__self___down_blocks_0_resnets_1_time_emb_proj_weight, l__self___down_blocks_0_resnets_1_time_emb_proj_bias); temb_2 = l__self___down_blocks_0_resnets_1_time_emb_proj_weight = l__self___down_blocks_0_resnets_1_time_emb_proj_bias = None | |
temb_3 = out_5[(slice(None, None, None), slice(None, None, None), None, None)]; out_5 = None | |
hidden_states_14 = hidden_states_13 + temb_3; hidden_states_13 = temb_3 = None | |
hidden_states_15 = self.L__self___down_blocks_0_resnets_1_norm2(hidden_states_14); hidden_states_14 = None | |
hidden_states_16 = self.L__self___time_embedding_act(hidden_states_15); hidden_states_15 = None | |
hidden_states_17 = self.L__self___down_blocks_0_resnets_1_dropout(hidden_states_16); hidden_states_16 = None | |
l__self___down_blocks_0_resnets_1_conv2_weight = self.L__self___down_blocks_0_resnets_1_conv2_weight | |
l__self___down_blocks_0_resnets_1_conv2_bias = self.L__self___down_blocks_0_resnets_1_conv2_bias | |
hidden_states_18 = torch.conv2d(hidden_states_17, l__self___down_blocks_0_resnets_1_conv2_weight, l__self___down_blocks_0_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_17 = l__self___down_blocks_0_resnets_1_conv2_weight = l__self___down_blocks_0_resnets_1_conv2_bias = None | |
add_4 = res_hidden_states_7 + hidden_states_18; hidden_states_18 = None | |
res_hidden_states_6 = add_4 / 1.0; add_4 = None | |
l__self___down_blocks_0_downsamplers_0_conv_weight = self.L__self___down_blocks_0_downsamplers_0_conv_weight | |
l__self___down_blocks_0_downsamplers_0_conv_bias = self.L__self___down_blocks_0_downsamplers_0_conv_bias | |
res_hidden_states_5 = torch.conv2d(res_hidden_states_6, l__self___down_blocks_0_downsamplers_0_conv_weight, l__self___down_blocks_0_downsamplers_0_conv_bias, (2, 2), (1, 1), (1, 1), 1); l__self___down_blocks_0_downsamplers_0_conv_weight = l__self___down_blocks_0_downsamplers_0_conv_bias = None | |
hidden_states_23 = self.L__self___down_blocks_1_resnets_0_norm1(res_hidden_states_5) | |
hidden_states_24 = self.L__self___time_embedding_act(hidden_states_23); hidden_states_23 = None | |
l__self___down_blocks_1_resnets_0_conv1_weight = self.L__self___down_blocks_1_resnets_0_conv1_weight | |
l__self___down_blocks_1_resnets_0_conv1_bias = self.L__self___down_blocks_1_resnets_0_conv1_bias | |
hidden_states_25 = torch.conv2d(hidden_states_24, l__self___down_blocks_1_resnets_0_conv1_weight, l__self___down_blocks_1_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_24 = l__self___down_blocks_1_resnets_0_conv1_weight = l__self___down_blocks_1_resnets_0_conv1_bias = None | |
temb_4 = self.L__self___time_embedding_act(emb_11) | |
l__self___down_blocks_1_resnets_0_time_emb_proj_weight = self.L__self___down_blocks_1_resnets_0_time_emb_proj_weight | |
l__self___down_blocks_1_resnets_0_time_emb_proj_bias = self.L__self___down_blocks_1_resnets_0_time_emb_proj_bias | |
out_6 = torch._C._nn.linear(temb_4, l__self___down_blocks_1_resnets_0_time_emb_proj_weight, l__self___down_blocks_1_resnets_0_time_emb_proj_bias); temb_4 = l__self___down_blocks_1_resnets_0_time_emb_proj_weight = l__self___down_blocks_1_resnets_0_time_emb_proj_bias = None | |
temb_5 = out_6[(slice(None, None, None), slice(None, None, None), None, None)]; out_6 = None | |
hidden_states_26 = hidden_states_25 + temb_5; hidden_states_25 = temb_5 = None | |
hidden_states_27 = self.L__self___down_blocks_1_resnets_0_norm2(hidden_states_26); hidden_states_26 = None | |
hidden_states_28 = self.L__self___time_embedding_act(hidden_states_27); hidden_states_27 = None | |
hidden_states_29 = self.L__self___down_blocks_1_resnets_0_dropout(hidden_states_28); hidden_states_28 = None | |
l__self___down_blocks_1_resnets_0_conv2_weight = self.L__self___down_blocks_1_resnets_0_conv2_weight | |
l__self___down_blocks_1_resnets_0_conv2_bias = self.L__self___down_blocks_1_resnets_0_conv2_bias | |
hidden_states_30 = torch.conv2d(hidden_states_29, l__self___down_blocks_1_resnets_0_conv2_weight, l__self___down_blocks_1_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_29 = l__self___down_blocks_1_resnets_0_conv2_weight = l__self___down_blocks_1_resnets_0_conv2_bias = None | |
l__self___down_blocks_1_resnets_0_conv_shortcut_weight = self.L__self___down_blocks_1_resnets_0_conv_shortcut_weight | |
l__self___down_blocks_1_resnets_0_conv_shortcut_bias = self.L__self___down_blocks_1_resnets_0_conv_shortcut_bias | |
input_tensor = torch.conv2d(res_hidden_states_5, l__self___down_blocks_1_resnets_0_conv_shortcut_weight, l__self___down_blocks_1_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); l__self___down_blocks_1_resnets_0_conv_shortcut_weight = l__self___down_blocks_1_resnets_0_conv_shortcut_bias = None | |
add_6 = input_tensor + hidden_states_30; input_tensor = hidden_states_30 = None | |
residual = add_6 / 1.0; add_6 = None | |
hidden_states_32 = self.L__self___down_blocks_1_attentions_0_norm(residual) | |
permute = hidden_states_32.permute(0, 2, 3, 1); hidden_states_32 = None | |
hidden_states_33 = permute.reshape(2, 4096, 640); permute = None | |
l__self___down_blocks_1_attentions_0_proj_in_weight = self.L__self___down_blocks_1_attentions_0_proj_in_weight | |
l__self___down_blocks_1_attentions_0_proj_in_bias = self.L__self___down_blocks_1_attentions_0_proj_in_bias | |
hidden_states_34 = torch._C._nn.linear(hidden_states_33, l__self___down_blocks_1_attentions_0_proj_in_weight, l__self___down_blocks_1_attentions_0_proj_in_bias); hidden_states_33 = l__self___down_blocks_1_attentions_0_proj_in_weight = l__self___down_blocks_1_attentions_0_proj_in_bias = None | |
hidden_states_35 = self.L__self___down_blocks_1_attentions_0_transformer_blocks_0(hidden_states_34, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_34 = None | |
hidden_states_36 = self.L__self___down_blocks_1_attentions_0_transformer_blocks_1(hidden_states_35, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_35 = None | |
l__self___down_blocks_1_attentions_0_proj_out_weight = self.L__self___down_blocks_1_attentions_0_proj_out_weight | |
l__self___down_blocks_1_attentions_0_proj_out_bias = self.L__self___down_blocks_1_attentions_0_proj_out_bias | |
hidden_states_37 = torch._C._nn.linear(hidden_states_36, l__self___down_blocks_1_attentions_0_proj_out_weight, l__self___down_blocks_1_attentions_0_proj_out_bias); hidden_states_36 = l__self___down_blocks_1_attentions_0_proj_out_weight = l__self___down_blocks_1_attentions_0_proj_out_bias = None | |
reshape_2 = hidden_states_37.reshape(2, 64, 64, 640); hidden_states_37 = None | |
permute_1 = reshape_2.permute(0, 3, 1, 2); reshape_2 = None | |
hidden_states_38 = permute_1.contiguous(); permute_1 = None | |
res_hidden_states_4 = hidden_states_38 + residual; hidden_states_38 = residual = None | |
hidden_states_41 = self.L__self___down_blocks_1_resnets_1_norm1(res_hidden_states_4) | |
hidden_states_42 = self.L__self___time_embedding_act(hidden_states_41); hidden_states_41 = None | |
l__self___down_blocks_1_resnets_1_conv1_weight = self.L__self___down_blocks_1_resnets_1_conv1_weight | |
l__self___down_blocks_1_resnets_1_conv1_bias = self.L__self___down_blocks_1_resnets_1_conv1_bias | |
hidden_states_43 = torch.conv2d(hidden_states_42, l__self___down_blocks_1_resnets_1_conv1_weight, l__self___down_blocks_1_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_42 = l__self___down_blocks_1_resnets_1_conv1_weight = l__self___down_blocks_1_resnets_1_conv1_bias = None | |
temb_6 = self.L__self___time_embedding_act(emb_11) | |
l__self___down_blocks_1_resnets_1_time_emb_proj_weight = self.L__self___down_blocks_1_resnets_1_time_emb_proj_weight | |
l__self___down_blocks_1_resnets_1_time_emb_proj_bias = self.L__self___down_blocks_1_resnets_1_time_emb_proj_bias | |
out_9 = torch._C._nn.linear(temb_6, l__self___down_blocks_1_resnets_1_time_emb_proj_weight, l__self___down_blocks_1_resnets_1_time_emb_proj_bias); temb_6 = l__self___down_blocks_1_resnets_1_time_emb_proj_weight = l__self___down_blocks_1_resnets_1_time_emb_proj_bias = None | |
temb_7 = out_9[(slice(None, None, None), slice(None, None, None), None, None)]; out_9 = None | |
hidden_states_44 = hidden_states_43 + temb_7; hidden_states_43 = temb_7 = None | |
hidden_states_45 = self.L__self___down_blocks_1_resnets_1_norm2(hidden_states_44); hidden_states_44 = None | |
hidden_states_46 = self.L__self___time_embedding_act(hidden_states_45); hidden_states_45 = None | |
hidden_states_47 = self.L__self___down_blocks_1_resnets_1_dropout(hidden_states_46); hidden_states_46 = None | |
l__self___down_blocks_1_resnets_1_conv2_weight = self.L__self___down_blocks_1_resnets_1_conv2_weight | |
l__self___down_blocks_1_resnets_1_conv2_bias = self.L__self___down_blocks_1_resnets_1_conv2_bias | |
hidden_states_48 = torch.conv2d(hidden_states_47, l__self___down_blocks_1_resnets_1_conv2_weight, l__self___down_blocks_1_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_47 = l__self___down_blocks_1_resnets_1_conv2_weight = l__self___down_blocks_1_resnets_1_conv2_bias = None | |
add_9 = res_hidden_states_4 + hidden_states_48; hidden_states_48 = None | |
residual_1 = add_9 / 1.0; add_9 = None | |
hidden_states_50 = self.L__self___down_blocks_1_attentions_1_norm(residual_1) | |
permute_2 = hidden_states_50.permute(0, 2, 3, 1); hidden_states_50 = None | |
hidden_states_51 = permute_2.reshape(2, 4096, 640); permute_2 = None | |
l__self___down_blocks_1_attentions_1_proj_in_weight = self.L__self___down_blocks_1_attentions_1_proj_in_weight | |
l__self___down_blocks_1_attentions_1_proj_in_bias = self.L__self___down_blocks_1_attentions_1_proj_in_bias | |
hidden_states_52 = torch._C._nn.linear(hidden_states_51, l__self___down_blocks_1_attentions_1_proj_in_weight, l__self___down_blocks_1_attentions_1_proj_in_bias); hidden_states_51 = l__self___down_blocks_1_attentions_1_proj_in_weight = l__self___down_blocks_1_attentions_1_proj_in_bias = None | |
hidden_states_53 = self.L__self___down_blocks_1_attentions_1_transformer_blocks_0(hidden_states_52, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_52 = None | |
hidden_states_54 = self.L__self___down_blocks_1_attentions_1_transformer_blocks_1(hidden_states_53, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_53 = None | |
l__self___down_blocks_1_attentions_1_proj_out_weight = self.L__self___down_blocks_1_attentions_1_proj_out_weight | |
l__self___down_blocks_1_attentions_1_proj_out_bias = self.L__self___down_blocks_1_attentions_1_proj_out_bias | |
hidden_states_55 = torch._C._nn.linear(hidden_states_54, l__self___down_blocks_1_attentions_1_proj_out_weight, l__self___down_blocks_1_attentions_1_proj_out_bias); hidden_states_54 = l__self___down_blocks_1_attentions_1_proj_out_weight = l__self___down_blocks_1_attentions_1_proj_out_bias = None | |
reshape_4 = hidden_states_55.reshape(2, 64, 64, 640); hidden_states_55 = None | |
permute_3 = reshape_4.permute(0, 3, 1, 2); reshape_4 = None | |
hidden_states_56 = permute_3.contiguous(); permute_3 = None | |
res_hidden_states_3 = hidden_states_56 + residual_1; hidden_states_56 = residual_1 = None | |
l__self___down_blocks_1_downsamplers_0_conv_weight = self.L__self___down_blocks_1_downsamplers_0_conv_weight | |
l__self___down_blocks_1_downsamplers_0_conv_bias = self.L__self___down_blocks_1_downsamplers_0_conv_bias | |
res_hidden_states_2 = torch.conv2d(res_hidden_states_3, l__self___down_blocks_1_downsamplers_0_conv_weight, l__self___down_blocks_1_downsamplers_0_conv_bias, (2, 2), (1, 1), (1, 1), 1); l__self___down_blocks_1_downsamplers_0_conv_weight = l__self___down_blocks_1_downsamplers_0_conv_bias = None | |
hidden_states_61 = self.L__self___down_blocks_2_resnets_0_norm1(res_hidden_states_2) | |
hidden_states_62 = self.L__self___time_embedding_act(hidden_states_61); hidden_states_61 = None | |
l__self___down_blocks_2_resnets_0_conv1_weight = self.L__self___down_blocks_2_resnets_0_conv1_weight | |
l__self___down_blocks_2_resnets_0_conv1_bias = self.L__self___down_blocks_2_resnets_0_conv1_bias | |
hidden_states_63 = torch.conv2d(hidden_states_62, l__self___down_blocks_2_resnets_0_conv1_weight, l__self___down_blocks_2_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_62 = l__self___down_blocks_2_resnets_0_conv1_weight = l__self___down_blocks_2_resnets_0_conv1_bias = None | |
temb_8 = self.L__self___time_embedding_act(emb_11) | |
l__self___down_blocks_2_resnets_0_time_emb_proj_weight = self.L__self___down_blocks_2_resnets_0_time_emb_proj_weight | |
l__self___down_blocks_2_resnets_0_time_emb_proj_bias = self.L__self___down_blocks_2_resnets_0_time_emb_proj_bias | |
out_12 = torch._C._nn.linear(temb_8, l__self___down_blocks_2_resnets_0_time_emb_proj_weight, l__self___down_blocks_2_resnets_0_time_emb_proj_bias); temb_8 = l__self___down_blocks_2_resnets_0_time_emb_proj_weight = l__self___down_blocks_2_resnets_0_time_emb_proj_bias = None | |
temb_9 = out_12[(slice(None, None, None), slice(None, None, None), None, None)]; out_12 = None | |
hidden_states_64 = hidden_states_63 + temb_9; hidden_states_63 = temb_9 = None | |
hidden_states_65 = self.L__self___down_blocks_2_resnets_0_norm2(hidden_states_64); hidden_states_64 = None | |
hidden_states_66 = self.L__self___time_embedding_act(hidden_states_65); hidden_states_65 = None | |
hidden_states_67 = self.L__self___down_blocks_2_resnets_0_dropout(hidden_states_66); hidden_states_66 = None | |
l__self___down_blocks_2_resnets_0_conv2_weight = self.L__self___down_blocks_2_resnets_0_conv2_weight | |
l__self___down_blocks_2_resnets_0_conv2_bias = self.L__self___down_blocks_2_resnets_0_conv2_bias | |
hidden_states_68 = torch.conv2d(hidden_states_67, l__self___down_blocks_2_resnets_0_conv2_weight, l__self___down_blocks_2_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_67 = l__self___down_blocks_2_resnets_0_conv2_weight = l__self___down_blocks_2_resnets_0_conv2_bias = None | |
l__self___down_blocks_2_resnets_0_conv_shortcut_weight = self.L__self___down_blocks_2_resnets_0_conv_shortcut_weight | |
l__self___down_blocks_2_resnets_0_conv_shortcut_bias = self.L__self___down_blocks_2_resnets_0_conv_shortcut_bias | |
input_tensor_1 = torch.conv2d(res_hidden_states_2, l__self___down_blocks_2_resnets_0_conv_shortcut_weight, l__self___down_blocks_2_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); l__self___down_blocks_2_resnets_0_conv_shortcut_weight = l__self___down_blocks_2_resnets_0_conv_shortcut_bias = None | |
add_12 = input_tensor_1 + hidden_states_68; input_tensor_1 = hidden_states_68 = None | |
residual_2 = add_12 / 1.0; add_12 = None | |
hidden_states_70 = self.L__self___down_blocks_2_attentions_0_norm(residual_2) | |
permute_4 = hidden_states_70.permute(0, 2, 3, 1); hidden_states_70 = None | |
hidden_states_71 = permute_4.reshape(2, 1024, 1280); permute_4 = None | |
l__self___down_blocks_2_attentions_0_proj_in_weight = self.L__self___down_blocks_2_attentions_0_proj_in_weight | |
l__self___down_blocks_2_attentions_0_proj_in_bias = self.L__self___down_blocks_2_attentions_0_proj_in_bias | |
hidden_states_72 = torch._C._nn.linear(hidden_states_71, l__self___down_blocks_2_attentions_0_proj_in_weight, l__self___down_blocks_2_attentions_0_proj_in_bias); hidden_states_71 = l__self___down_blocks_2_attentions_0_proj_in_weight = l__self___down_blocks_2_attentions_0_proj_in_bias = None | |
hidden_states_73 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_0(hidden_states_72, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_72 = None | |
hidden_states_74 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_1(hidden_states_73, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_73 = None | |
hidden_states_75 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_2(hidden_states_74, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_74 = None | |
hidden_states_76 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_3(hidden_states_75, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_75 = None | |
hidden_states_77 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_4(hidden_states_76, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_76 = None | |
hidden_states_78 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_5(hidden_states_77, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_77 = None | |
hidden_states_79 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_6(hidden_states_78, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_78 = None | |
hidden_states_80 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_7(hidden_states_79, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_79 = None | |
hidden_states_81 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_8(hidden_states_80, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_80 = None | |
hidden_states_82 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_9(hidden_states_81, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_81 = None | |
l__self___down_blocks_2_attentions_0_proj_out_weight = self.L__self___down_blocks_2_attentions_0_proj_out_weight | |
l__self___down_blocks_2_attentions_0_proj_out_bias = self.L__self___down_blocks_2_attentions_0_proj_out_bias | |
hidden_states_83 = torch._C._nn.linear(hidden_states_82, l__self___down_blocks_2_attentions_0_proj_out_weight, l__self___down_blocks_2_attentions_0_proj_out_bias); hidden_states_82 = l__self___down_blocks_2_attentions_0_proj_out_weight = l__self___down_blocks_2_attentions_0_proj_out_bias = None | |
reshape_6 = hidden_states_83.reshape(2, 32, 32, 1280); hidden_states_83 = None | |
permute_5 = reshape_6.permute(0, 3, 1, 2); reshape_6 = None | |
hidden_states_84 = permute_5.contiguous(); permute_5 = None | |
res_hidden_states_1 = hidden_states_84 + residual_2; hidden_states_84 = residual_2 = None | |
hidden_states_87 = self.L__self___down_blocks_2_resnets_1_norm1(res_hidden_states_1) | |
hidden_states_88 = self.L__self___time_embedding_act(hidden_states_87); hidden_states_87 = None | |
l__self___down_blocks_2_resnets_1_conv1_weight = self.L__self___down_blocks_2_resnets_1_conv1_weight | |
l__self___down_blocks_2_resnets_1_conv1_bias = self.L__self___down_blocks_2_resnets_1_conv1_bias | |
hidden_states_89 = torch.conv2d(hidden_states_88, l__self___down_blocks_2_resnets_1_conv1_weight, l__self___down_blocks_2_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_88 = l__self___down_blocks_2_resnets_1_conv1_weight = l__self___down_blocks_2_resnets_1_conv1_bias = None | |
temb_10 = self.L__self___time_embedding_act(emb_11) | |
l__self___down_blocks_2_resnets_1_time_emb_proj_weight = self.L__self___down_blocks_2_resnets_1_time_emb_proj_weight | |
l__self___down_blocks_2_resnets_1_time_emb_proj_bias = self.L__self___down_blocks_2_resnets_1_time_emb_proj_bias | |
out_15 = torch._C._nn.linear(temb_10, l__self___down_blocks_2_resnets_1_time_emb_proj_weight, l__self___down_blocks_2_resnets_1_time_emb_proj_bias); temb_10 = l__self___down_blocks_2_resnets_1_time_emb_proj_weight = l__self___down_blocks_2_resnets_1_time_emb_proj_bias = None | |
temb_11 = out_15[(slice(None, None, None), slice(None, None, None), None, None)]; out_15 = None | |
hidden_states_90 = hidden_states_89 + temb_11; hidden_states_89 = temb_11 = None | |
hidden_states_91 = self.L__self___down_blocks_2_resnets_1_norm2(hidden_states_90); hidden_states_90 = None | |
hidden_states_92 = self.L__self___time_embedding_act(hidden_states_91); hidden_states_91 = None | |
hidden_states_93 = self.L__self___down_blocks_2_resnets_1_dropout(hidden_states_92); hidden_states_92 = None | |
l__self___down_blocks_2_resnets_1_conv2_weight = self.L__self___down_blocks_2_resnets_1_conv2_weight | |
l__self___down_blocks_2_resnets_1_conv2_bias = self.L__self___down_blocks_2_resnets_1_conv2_bias | |
hidden_states_94 = torch.conv2d(hidden_states_93, l__self___down_blocks_2_resnets_1_conv2_weight, l__self___down_blocks_2_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_93 = l__self___down_blocks_2_resnets_1_conv2_weight = l__self___down_blocks_2_resnets_1_conv2_bias = None | |
add_15 = res_hidden_states_1 + hidden_states_94; hidden_states_94 = None | |
residual_3 = add_15 / 1.0; add_15 = None | |
hidden_states_96 = self.L__self___down_blocks_2_attentions_1_norm(residual_3) | |
permute_6 = hidden_states_96.permute(0, 2, 3, 1); hidden_states_96 = None | |
hidden_states_97 = permute_6.reshape(2, 1024, 1280); permute_6 = None | |
l__self___down_blocks_2_attentions_1_proj_in_weight = self.L__self___down_blocks_2_attentions_1_proj_in_weight | |
l__self___down_blocks_2_attentions_1_proj_in_bias = self.L__self___down_blocks_2_attentions_1_proj_in_bias | |
hidden_states_98 = torch._C._nn.linear(hidden_states_97, l__self___down_blocks_2_attentions_1_proj_in_weight, l__self___down_blocks_2_attentions_1_proj_in_bias); hidden_states_97 = l__self___down_blocks_2_attentions_1_proj_in_weight = l__self___down_blocks_2_attentions_1_proj_in_bias = None | |
hidden_states_99 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_0(hidden_states_98, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_98 = None | |
hidden_states_100 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_1(hidden_states_99, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_99 = None | |
hidden_states_101 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_2(hidden_states_100, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_100 = None | |
hidden_states_102 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_3(hidden_states_101, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_101 = None | |
hidden_states_103 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_4(hidden_states_102, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_102 = None | |
hidden_states_104 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_5(hidden_states_103, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_103 = None | |
hidden_states_105 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_6(hidden_states_104, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_104 = None | |
hidden_states_106 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_7(hidden_states_105, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_105 = None | |
hidden_states_107 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_8(hidden_states_106, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_106 = None | |
hidden_states_108 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_9(hidden_states_107, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_107 = None | |
l__self___down_blocks_2_attentions_1_proj_out_weight = self.L__self___down_blocks_2_attentions_1_proj_out_weight | |
l__self___down_blocks_2_attentions_1_proj_out_bias = self.L__self___down_blocks_2_attentions_1_proj_out_bias | |
hidden_states_109 = torch._C._nn.linear(hidden_states_108, l__self___down_blocks_2_attentions_1_proj_out_weight, l__self___down_blocks_2_attentions_1_proj_out_bias); hidden_states_108 = l__self___down_blocks_2_attentions_1_proj_out_weight = l__self___down_blocks_2_attentions_1_proj_out_bias = None | |
reshape_8 = hidden_states_109.reshape(2, 32, 32, 1280); hidden_states_109 = None | |
permute_7 = reshape_8.permute(0, 3, 1, 2); reshape_8 = None | |
hidden_states_110 = permute_7.contiguous(); permute_7 = None | |
res_hidden_states = hidden_states_110 + residual_3; hidden_states_110 = residual_3 = None | |
hidden_states_113 = self.L__self___mid_block_resnets_0_norm1(res_hidden_states) | |
hidden_states_114 = self.L__self___time_embedding_act(hidden_states_113); hidden_states_113 = None | |
l__self___mid_block_resnets_0_conv1_weight = self.L__self___mid_block_resnets_0_conv1_weight | |
l__self___mid_block_resnets_0_conv1_bias = self.L__self___mid_block_resnets_0_conv1_bias | |
hidden_states_115 = torch.conv2d(hidden_states_114, l__self___mid_block_resnets_0_conv1_weight, l__self___mid_block_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_114 = l__self___mid_block_resnets_0_conv1_weight = l__self___mid_block_resnets_0_conv1_bias = None | |
temb_12 = self.L__self___time_embedding_act(emb_11) | |
l__self___mid_block_resnets_0_time_emb_proj_weight = self.L__self___mid_block_resnets_0_time_emb_proj_weight | |
l__self___mid_block_resnets_0_time_emb_proj_bias = self.L__self___mid_block_resnets_0_time_emb_proj_bias | |
out_18 = torch._C._nn.linear(temb_12, l__self___mid_block_resnets_0_time_emb_proj_weight, l__self___mid_block_resnets_0_time_emb_proj_bias); temb_12 = l__self___mid_block_resnets_0_time_emb_proj_weight = l__self___mid_block_resnets_0_time_emb_proj_bias = None | |
temb_13 = out_18[(slice(None, None, None), slice(None, None, None), None, None)]; out_18 = None | |
hidden_states_116 = hidden_states_115 + temb_13; hidden_states_115 = temb_13 = None | |
hidden_states_117 = self.L__self___mid_block_resnets_0_norm2(hidden_states_116); hidden_states_116 = None | |
hidden_states_118 = self.L__self___time_embedding_act(hidden_states_117); hidden_states_117 = None | |
hidden_states_119 = self.L__self___mid_block_resnets_0_dropout(hidden_states_118); hidden_states_118 = None | |
l__self___mid_block_resnets_0_conv2_weight = self.L__self___mid_block_resnets_0_conv2_weight | |
l__self___mid_block_resnets_0_conv2_bias = self.L__self___mid_block_resnets_0_conv2_bias | |
hidden_states_120 = torch.conv2d(hidden_states_119, l__self___mid_block_resnets_0_conv2_weight, l__self___mid_block_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_119 = l__self___mid_block_resnets_0_conv2_weight = l__self___mid_block_resnets_0_conv2_bias = None | |
add_18 = res_hidden_states + hidden_states_120; hidden_states_120 = None | |
residual_4 = add_18 / 1; add_18 = None | |
hidden_states_122 = self.L__self___mid_block_attentions_0_norm(residual_4) | |
permute_8 = hidden_states_122.permute(0, 2, 3, 1); hidden_states_122 = None | |
hidden_states_123 = permute_8.reshape(2, 1024, 1280); permute_8 = None | |
l__self___mid_block_attentions_0_proj_in_weight = self.L__self___mid_block_attentions_0_proj_in_weight | |
l__self___mid_block_attentions_0_proj_in_bias = self.L__self___mid_block_attentions_0_proj_in_bias | |
hidden_states_124 = torch._C._nn.linear(hidden_states_123, l__self___mid_block_attentions_0_proj_in_weight, l__self___mid_block_attentions_0_proj_in_bias); hidden_states_123 = l__self___mid_block_attentions_0_proj_in_weight = l__self___mid_block_attentions_0_proj_in_bias = None | |
hidden_states_125 = self.L__self___mid_block_attentions_0_transformer_blocks_0(hidden_states_124, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_124 = None | |
hidden_states_126 = self.L__self___mid_block_attentions_0_transformer_blocks_1(hidden_states_125, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_125 = None | |
hidden_states_127 = self.L__self___mid_block_attentions_0_transformer_blocks_2(hidden_states_126, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_126 = None | |
hidden_states_128 = self.L__self___mid_block_attentions_0_transformer_blocks_3(hidden_states_127, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_127 = None | |
hidden_states_129 = self.L__self___mid_block_attentions_0_transformer_blocks_4(hidden_states_128, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_128 = None | |
hidden_states_130 = self.L__self___mid_block_attentions_0_transformer_blocks_5(hidden_states_129, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_129 = None | |
hidden_states_131 = self.L__self___mid_block_attentions_0_transformer_blocks_6(hidden_states_130, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_130 = None | |
hidden_states_132 = self.L__self___mid_block_attentions_0_transformer_blocks_7(hidden_states_131, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_131 = None | |
hidden_states_133 = self.L__self___mid_block_attentions_0_transformer_blocks_8(hidden_states_132, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_132 = None | |
hidden_states_134 = self.L__self___mid_block_attentions_0_transformer_blocks_9(hidden_states_133, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_133 = None | |
l__self___mid_block_attentions_0_proj_out_weight = self.L__self___mid_block_attentions_0_proj_out_weight | |
l__self___mid_block_attentions_0_proj_out_bias = self.L__self___mid_block_attentions_0_proj_out_bias | |
hidden_states_135 = torch._C._nn.linear(hidden_states_134, l__self___mid_block_attentions_0_proj_out_weight, l__self___mid_block_attentions_0_proj_out_bias); hidden_states_134 = l__self___mid_block_attentions_0_proj_out_weight = l__self___mid_block_attentions_0_proj_out_bias = None | |
reshape_10 = hidden_states_135.reshape(2, 32, 32, 1280); hidden_states_135 = None | |
permute_9 = reshape_10.permute(0, 3, 1, 2); reshape_10 = None | |
hidden_states_136 = permute_9.contiguous(); permute_9 = None | |
hidden_states_138 = hidden_states_136 + residual_4; hidden_states_136 = residual_4 = None | |
hidden_states_139 = self.L__self___mid_block_resnets_slice_1__None__None___0_norm1(hidden_states_138) | |
hidden_states_140 = self.L__self___time_embedding_act(hidden_states_139); hidden_states_139 = None | |
l__self___mid_block_resnets_slice_1__none__none___0_conv1_weight = self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_weight | |
l__self___mid_block_resnets_slice_1__none__none___0_conv1_bias = self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_bias | |
hidden_states_141 = torch.conv2d(hidden_states_140, l__self___mid_block_resnets_slice_1__none__none___0_conv1_weight, l__self___mid_block_resnets_slice_1__none__none___0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_140 = l__self___mid_block_resnets_slice_1__none__none___0_conv1_weight = l__self___mid_block_resnets_slice_1__none__none___0_conv1_bias = None | |
temb_14 = self.L__self___time_embedding_act(emb_11) | |
l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_weight = self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_weight | |
l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_bias = self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_bias | |
out_21 = torch._C._nn.linear(temb_14, l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_weight, l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_bias); temb_14 = l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_weight = l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_bias = None | |
temb_15 = out_21[(slice(None, None, None), slice(None, None, None), None, None)]; out_21 = None | |
hidden_states_142 = hidden_states_141 + temb_15; hidden_states_141 = temb_15 = None | |
hidden_states_143 = self.L__self___mid_block_resnets_slice_1__None__None___0_norm2(hidden_states_142); hidden_states_142 = None | |
hidden_states_144 = self.L__self___time_embedding_act(hidden_states_143); hidden_states_143 = None | |
hidden_states_145 = self.L__self___mid_block_resnets_slice_1__None__None___0_dropout(hidden_states_144); hidden_states_144 = None | |
l__self___mid_block_resnets_slice_1__none__none___0_conv2_weight = self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_weight | |
l__self___mid_block_resnets_slice_1__none__none___0_conv2_bias = self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_bias | |
hidden_states_146 = torch.conv2d(hidden_states_145, l__self___mid_block_resnets_slice_1__none__none___0_conv2_weight, l__self___mid_block_resnets_slice_1__none__none___0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_145 = l__self___mid_block_resnets_slice_1__none__none___0_conv2_weight = l__self___mid_block_resnets_slice_1__none__none___0_conv2_bias = None | |
add_21 = hidden_states_138 + hidden_states_146; hidden_states_138 = hidden_states_146 = None | |
sample_10 = add_21 / 1; add_21 = None | |
hidden_states_149 = torch.cat([sample_10, res_hidden_states], dim = 1); sample_10 = res_hidden_states = None | |
hidden_states_150 = self.L__self___up_blocks_0_resnets_0_norm1(hidden_states_149) | |
hidden_states_151 = self.L__self___time_embedding_act(hidden_states_150); hidden_states_150 = None | |
l__self___up_blocks_0_resnets_0_conv1_weight = self.L__self___up_blocks_0_resnets_0_conv1_weight | |
l__self___up_blocks_0_resnets_0_conv1_bias = self.L__self___up_blocks_0_resnets_0_conv1_bias | |
hidden_states_152 = torch.conv2d(hidden_states_151, l__self___up_blocks_0_resnets_0_conv1_weight, l__self___up_blocks_0_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_151 = l__self___up_blocks_0_resnets_0_conv1_weight = l__self___up_blocks_0_resnets_0_conv1_bias = None | |
temb_16 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_0_resnets_0_time_emb_proj_weight = self.L__self___up_blocks_0_resnets_0_time_emb_proj_weight | |
l__self___up_blocks_0_resnets_0_time_emb_proj_bias = self.L__self___up_blocks_0_resnets_0_time_emb_proj_bias | |
out_22 = torch._C._nn.linear(temb_16, l__self___up_blocks_0_resnets_0_time_emb_proj_weight, l__self___up_blocks_0_resnets_0_time_emb_proj_bias); temb_16 = l__self___up_blocks_0_resnets_0_time_emb_proj_weight = l__self___up_blocks_0_resnets_0_time_emb_proj_bias = None | |
temb_17 = out_22[(slice(None, None, None), slice(None, None, None), None, None)]; out_22 = None | |
hidden_states_153 = hidden_states_152 + temb_17; hidden_states_152 = temb_17 = None | |
hidden_states_154 = self.L__self___up_blocks_0_resnets_0_norm2(hidden_states_153); hidden_states_153 = None | |
hidden_states_155 = self.L__self___time_embedding_act(hidden_states_154); hidden_states_154 = None | |
hidden_states_156 = self.L__self___up_blocks_0_resnets_0_dropout(hidden_states_155); hidden_states_155 = None | |
l__self___up_blocks_0_resnets_0_conv2_weight = self.L__self___up_blocks_0_resnets_0_conv2_weight | |
l__self___up_blocks_0_resnets_0_conv2_bias = self.L__self___up_blocks_0_resnets_0_conv2_bias | |
hidden_states_157 = torch.conv2d(hidden_states_156, l__self___up_blocks_0_resnets_0_conv2_weight, l__self___up_blocks_0_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_156 = l__self___up_blocks_0_resnets_0_conv2_weight = l__self___up_blocks_0_resnets_0_conv2_bias = None | |
l__self___up_blocks_0_resnets_0_conv_shortcut_weight = self.L__self___up_blocks_0_resnets_0_conv_shortcut_weight | |
l__self___up_blocks_0_resnets_0_conv_shortcut_bias = self.L__self___up_blocks_0_resnets_0_conv_shortcut_bias | |
input_tensor_2 = torch.conv2d(hidden_states_149, l__self___up_blocks_0_resnets_0_conv_shortcut_weight, l__self___up_blocks_0_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_149 = l__self___up_blocks_0_resnets_0_conv_shortcut_weight = l__self___up_blocks_0_resnets_0_conv_shortcut_bias = None | |
add_23 = input_tensor_2 + hidden_states_157; input_tensor_2 = hidden_states_157 = None | |
residual_5 = add_23 / 1.0; add_23 = None | |
hidden_states_159 = self.L__self___up_blocks_0_attentions_0_norm(residual_5) | |
permute_10 = hidden_states_159.permute(0, 2, 3, 1); hidden_states_159 = None | |
hidden_states_160 = permute_10.reshape(2, 1024, 1280); permute_10 = None | |
l__self___up_blocks_0_attentions_0_proj_in_weight = self.L__self___up_blocks_0_attentions_0_proj_in_weight | |
l__self___up_blocks_0_attentions_0_proj_in_bias = self.L__self___up_blocks_0_attentions_0_proj_in_bias | |
hidden_states_161 = torch._C._nn.linear(hidden_states_160, l__self___up_blocks_0_attentions_0_proj_in_weight, l__self___up_blocks_0_attentions_0_proj_in_bias); hidden_states_160 = l__self___up_blocks_0_attentions_0_proj_in_weight = l__self___up_blocks_0_attentions_0_proj_in_bias = None | |
hidden_states_162 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_0(hidden_states_161, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_161 = None | |
hidden_states_163 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_1(hidden_states_162, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_162 = None | |
hidden_states_164 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_2(hidden_states_163, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_163 = None | |
hidden_states_165 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_3(hidden_states_164, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_164 = None | |
hidden_states_166 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_4(hidden_states_165, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_165 = None | |
hidden_states_167 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_5(hidden_states_166, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_166 = None | |
hidden_states_168 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_6(hidden_states_167, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_167 = None | |
hidden_states_169 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_7(hidden_states_168, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_168 = None | |
hidden_states_170 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_8(hidden_states_169, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_169 = None | |
hidden_states_171 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_9(hidden_states_170, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_170 = None | |
l__self___up_blocks_0_attentions_0_proj_out_weight = self.L__self___up_blocks_0_attentions_0_proj_out_weight | |
l__self___up_blocks_0_attentions_0_proj_out_bias = self.L__self___up_blocks_0_attentions_0_proj_out_bias | |
hidden_states_172 = torch._C._nn.linear(hidden_states_171, l__self___up_blocks_0_attentions_0_proj_out_weight, l__self___up_blocks_0_attentions_0_proj_out_bias); hidden_states_171 = l__self___up_blocks_0_attentions_0_proj_out_weight = l__self___up_blocks_0_attentions_0_proj_out_bias = None | |
reshape_12 = hidden_states_172.reshape(2, 32, 32, 1280); hidden_states_172 = None | |
permute_11 = reshape_12.permute(0, 3, 1, 2); reshape_12 = None | |
hidden_states_173 = permute_11.contiguous(); permute_11 = None | |
hidden_states_174 = hidden_states_173 + residual_5; hidden_states_173 = residual_5 = None | |
hidden_states_176 = torch.cat([hidden_states_174, res_hidden_states_1], dim = 1); hidden_states_174 = res_hidden_states_1 = None | |
hidden_states_177 = self.L__self___up_blocks_0_resnets_1_norm1(hidden_states_176) | |
hidden_states_178 = self.L__self___time_embedding_act(hidden_states_177); hidden_states_177 = None | |
l__self___up_blocks_0_resnets_1_conv1_weight = self.L__self___up_blocks_0_resnets_1_conv1_weight | |
l__self___up_blocks_0_resnets_1_conv1_bias = self.L__self___up_blocks_0_resnets_1_conv1_bias | |
hidden_states_179 = torch.conv2d(hidden_states_178, l__self___up_blocks_0_resnets_1_conv1_weight, l__self___up_blocks_0_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_178 = l__self___up_blocks_0_resnets_1_conv1_weight = l__self___up_blocks_0_resnets_1_conv1_bias = None | |
temb_18 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_0_resnets_1_time_emb_proj_weight = self.L__self___up_blocks_0_resnets_1_time_emb_proj_weight | |
l__self___up_blocks_0_resnets_1_time_emb_proj_bias = self.L__self___up_blocks_0_resnets_1_time_emb_proj_bias | |
out_25 = torch._C._nn.linear(temb_18, l__self___up_blocks_0_resnets_1_time_emb_proj_weight, l__self___up_blocks_0_resnets_1_time_emb_proj_bias); temb_18 = l__self___up_blocks_0_resnets_1_time_emb_proj_weight = l__self___up_blocks_0_resnets_1_time_emb_proj_bias = None | |
temb_19 = out_25[(slice(None, None, None), slice(None, None, None), None, None)]; out_25 = None | |
hidden_states_180 = hidden_states_179 + temb_19; hidden_states_179 = temb_19 = None | |
hidden_states_181 = self.L__self___up_blocks_0_resnets_1_norm2(hidden_states_180); hidden_states_180 = None | |
hidden_states_182 = self.L__self___time_embedding_act(hidden_states_181); hidden_states_181 = None | |
hidden_states_183 = self.L__self___up_blocks_0_resnets_1_dropout(hidden_states_182); hidden_states_182 = None | |
l__self___up_blocks_0_resnets_1_conv2_weight = self.L__self___up_blocks_0_resnets_1_conv2_weight | |
l__self___up_blocks_0_resnets_1_conv2_bias = self.L__self___up_blocks_0_resnets_1_conv2_bias | |
hidden_states_184 = torch.conv2d(hidden_states_183, l__self___up_blocks_0_resnets_1_conv2_weight, l__self___up_blocks_0_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_183 = l__self___up_blocks_0_resnets_1_conv2_weight = l__self___up_blocks_0_resnets_1_conv2_bias = None | |
l__self___up_blocks_0_resnets_1_conv_shortcut_weight = self.L__self___up_blocks_0_resnets_1_conv_shortcut_weight | |
l__self___up_blocks_0_resnets_1_conv_shortcut_bias = self.L__self___up_blocks_0_resnets_1_conv_shortcut_bias | |
input_tensor_3 = torch.conv2d(hidden_states_176, l__self___up_blocks_0_resnets_1_conv_shortcut_weight, l__self___up_blocks_0_resnets_1_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_176 = l__self___up_blocks_0_resnets_1_conv_shortcut_weight = l__self___up_blocks_0_resnets_1_conv_shortcut_bias = None | |
add_26 = input_tensor_3 + hidden_states_184; input_tensor_3 = hidden_states_184 = None | |
residual_6 = add_26 / 1.0; add_26 = None | |
hidden_states_186 = self.L__self___up_blocks_0_attentions_1_norm(residual_6) | |
permute_12 = hidden_states_186.permute(0, 2, 3, 1); hidden_states_186 = None | |
hidden_states_187 = permute_12.reshape(2, 1024, 1280); permute_12 = None | |
l__self___up_blocks_0_attentions_1_proj_in_weight = self.L__self___up_blocks_0_attentions_1_proj_in_weight | |
l__self___up_blocks_0_attentions_1_proj_in_bias = self.L__self___up_blocks_0_attentions_1_proj_in_bias | |
hidden_states_188 = torch._C._nn.linear(hidden_states_187, l__self___up_blocks_0_attentions_1_proj_in_weight, l__self___up_blocks_0_attentions_1_proj_in_bias); hidden_states_187 = l__self___up_blocks_0_attentions_1_proj_in_weight = l__self___up_blocks_0_attentions_1_proj_in_bias = None | |
hidden_states_189 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_0(hidden_states_188, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_188 = None | |
hidden_states_190 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_1(hidden_states_189, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_189 = None | |
hidden_states_191 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_2(hidden_states_190, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_190 = None | |
hidden_states_192 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_3(hidden_states_191, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_191 = None | |
hidden_states_193 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_4(hidden_states_192, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_192 = None | |
hidden_states_194 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_5(hidden_states_193, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_193 = None | |
hidden_states_195 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_6(hidden_states_194, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_194 = None | |
hidden_states_196 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_7(hidden_states_195, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_195 = None | |
hidden_states_197 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_8(hidden_states_196, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_196 = None | |
hidden_states_198 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_9(hidden_states_197, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_197 = None | |
l__self___up_blocks_0_attentions_1_proj_out_weight = self.L__self___up_blocks_0_attentions_1_proj_out_weight | |
l__self___up_blocks_0_attentions_1_proj_out_bias = self.L__self___up_blocks_0_attentions_1_proj_out_bias | |
hidden_states_199 = torch._C._nn.linear(hidden_states_198, l__self___up_blocks_0_attentions_1_proj_out_weight, l__self___up_blocks_0_attentions_1_proj_out_bias); hidden_states_198 = l__self___up_blocks_0_attentions_1_proj_out_weight = l__self___up_blocks_0_attentions_1_proj_out_bias = None | |
reshape_14 = hidden_states_199.reshape(2, 32, 32, 1280); hidden_states_199 = None | |
permute_13 = reshape_14.permute(0, 3, 1, 2); reshape_14 = None | |
hidden_states_200 = permute_13.contiguous(); permute_13 = None | |
hidden_states_201 = hidden_states_200 + residual_6; hidden_states_200 = residual_6 = None | |
hidden_states_203 = torch.cat([hidden_states_201, res_hidden_states_2], dim = 1); hidden_states_201 = res_hidden_states_2 = None | |
hidden_states_204 = self.L__self___up_blocks_0_resnets_2_norm1(hidden_states_203) | |
hidden_states_205 = self.L__self___time_embedding_act(hidden_states_204); hidden_states_204 = None | |
l__self___up_blocks_0_resnets_2_conv1_weight = self.L__self___up_blocks_0_resnets_2_conv1_weight | |
l__self___up_blocks_0_resnets_2_conv1_bias = self.L__self___up_blocks_0_resnets_2_conv1_bias | |
hidden_states_206 = torch.conv2d(hidden_states_205, l__self___up_blocks_0_resnets_2_conv1_weight, l__self___up_blocks_0_resnets_2_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_205 = l__self___up_blocks_0_resnets_2_conv1_weight = l__self___up_blocks_0_resnets_2_conv1_bias = None | |
temb_20 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_0_resnets_2_time_emb_proj_weight = self.L__self___up_blocks_0_resnets_2_time_emb_proj_weight | |
l__self___up_blocks_0_resnets_2_time_emb_proj_bias = self.L__self___up_blocks_0_resnets_2_time_emb_proj_bias | |
out_28 = torch._C._nn.linear(temb_20, l__self___up_blocks_0_resnets_2_time_emb_proj_weight, l__self___up_blocks_0_resnets_2_time_emb_proj_bias); temb_20 = l__self___up_blocks_0_resnets_2_time_emb_proj_weight = l__self___up_blocks_0_resnets_2_time_emb_proj_bias = None | |
temb_21 = out_28[(slice(None, None, None), slice(None, None, None), None, None)]; out_28 = None | |
hidden_states_207 = hidden_states_206 + temb_21; hidden_states_206 = temb_21 = None | |
hidden_states_208 = self.L__self___up_blocks_0_resnets_2_norm2(hidden_states_207); hidden_states_207 = None | |
hidden_states_209 = self.L__self___time_embedding_act(hidden_states_208); hidden_states_208 = None | |
hidden_states_210 = self.L__self___up_blocks_0_resnets_2_dropout(hidden_states_209); hidden_states_209 = None | |
l__self___up_blocks_0_resnets_2_conv2_weight = self.L__self___up_blocks_0_resnets_2_conv2_weight | |
l__self___up_blocks_0_resnets_2_conv2_bias = self.L__self___up_blocks_0_resnets_2_conv2_bias | |
hidden_states_211 = torch.conv2d(hidden_states_210, l__self___up_blocks_0_resnets_2_conv2_weight, l__self___up_blocks_0_resnets_2_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_210 = l__self___up_blocks_0_resnets_2_conv2_weight = l__self___up_blocks_0_resnets_2_conv2_bias = None | |
l__self___up_blocks_0_resnets_2_conv_shortcut_weight = self.L__self___up_blocks_0_resnets_2_conv_shortcut_weight | |
l__self___up_blocks_0_resnets_2_conv_shortcut_bias = self.L__self___up_blocks_0_resnets_2_conv_shortcut_bias | |
input_tensor_4 = torch.conv2d(hidden_states_203, l__self___up_blocks_0_resnets_2_conv_shortcut_weight, l__self___up_blocks_0_resnets_2_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_203 = l__self___up_blocks_0_resnets_2_conv_shortcut_weight = l__self___up_blocks_0_resnets_2_conv_shortcut_bias = None | |
add_29 = input_tensor_4 + hidden_states_211; input_tensor_4 = hidden_states_211 = None | |
residual_7 = add_29 / 1.0; add_29 = None | |
hidden_states_213 = self.L__self___up_blocks_0_attentions_2_norm(residual_7) | |
permute_14 = hidden_states_213.permute(0, 2, 3, 1); hidden_states_213 = None | |
hidden_states_214 = permute_14.reshape(2, 1024, 1280); permute_14 = None | |
l__self___up_blocks_0_attentions_2_proj_in_weight = self.L__self___up_blocks_0_attentions_2_proj_in_weight | |
l__self___up_blocks_0_attentions_2_proj_in_bias = self.L__self___up_blocks_0_attentions_2_proj_in_bias | |
hidden_states_215 = torch._C._nn.linear(hidden_states_214, l__self___up_blocks_0_attentions_2_proj_in_weight, l__self___up_blocks_0_attentions_2_proj_in_bias); hidden_states_214 = l__self___up_blocks_0_attentions_2_proj_in_weight = l__self___up_blocks_0_attentions_2_proj_in_bias = None | |
hidden_states_216 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_0(hidden_states_215, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_215 = None | |
hidden_states_217 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_1(hidden_states_216, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_216 = None | |
hidden_states_218 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_2(hidden_states_217, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_217 = None | |
hidden_states_219 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_3(hidden_states_218, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_218 = None | |
hidden_states_220 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_4(hidden_states_219, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_219 = None | |
hidden_states_221 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_5(hidden_states_220, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_220 = None | |
hidden_states_222 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_6(hidden_states_221, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_221 = None | |
hidden_states_223 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_7(hidden_states_222, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_222 = None | |
hidden_states_224 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_8(hidden_states_223, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_223 = None | |
hidden_states_225 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_9(hidden_states_224, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_224 = None | |
l__self___up_blocks_0_attentions_2_proj_out_weight = self.L__self___up_blocks_0_attentions_2_proj_out_weight | |
l__self___up_blocks_0_attentions_2_proj_out_bias = self.L__self___up_blocks_0_attentions_2_proj_out_bias | |
hidden_states_226 = torch._C._nn.linear(hidden_states_225, l__self___up_blocks_0_attentions_2_proj_out_weight, l__self___up_blocks_0_attentions_2_proj_out_bias); hidden_states_225 = l__self___up_blocks_0_attentions_2_proj_out_weight = l__self___up_blocks_0_attentions_2_proj_out_bias = None | |
reshape_16 = hidden_states_226.reshape(2, 32, 32, 1280); hidden_states_226 = None | |
permute_15 = reshape_16.permute(0, 3, 1, 2); reshape_16 = None | |
hidden_states_227 = permute_15.contiguous(); permute_15 = None | |
hidden_states_228 = hidden_states_227 + residual_7; hidden_states_227 = residual_7 = None | |
hidden_states_229 = torch.nn.functional.interpolate(hidden_states_228, scale_factor = 2.0, mode = 'nearest'); hidden_states_228 = None | |
l__self___up_blocks_0_upsamplers_0_conv_weight = self.L__self___up_blocks_0_upsamplers_0_conv_weight | |
l__self___up_blocks_0_upsamplers_0_conv_bias = self.L__self___up_blocks_0_upsamplers_0_conv_bias | |
sample_11 = torch.conv2d(hidden_states_229, l__self___up_blocks_0_upsamplers_0_conv_weight, l__self___up_blocks_0_upsamplers_0_conv_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_229 = l__self___up_blocks_0_upsamplers_0_conv_weight = l__self___up_blocks_0_upsamplers_0_conv_bias = None | |
hidden_states_233 = torch.cat([sample_11, res_hidden_states_3], dim = 1); sample_11 = res_hidden_states_3 = None | |
hidden_states_234 = self.L__self___up_blocks_1_resnets_0_norm1(hidden_states_233) | |
hidden_states_235 = self.L__self___time_embedding_act(hidden_states_234); hidden_states_234 = None | |
l__self___up_blocks_1_resnets_0_conv1_weight = self.L__self___up_blocks_1_resnets_0_conv1_weight | |
l__self___up_blocks_1_resnets_0_conv1_bias = self.L__self___up_blocks_1_resnets_0_conv1_bias | |
hidden_states_236 = torch.conv2d(hidden_states_235, l__self___up_blocks_1_resnets_0_conv1_weight, l__self___up_blocks_1_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_235 = l__self___up_blocks_1_resnets_0_conv1_weight = l__self___up_blocks_1_resnets_0_conv1_bias = None | |
temb_22 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_1_resnets_0_time_emb_proj_weight = self.L__self___up_blocks_1_resnets_0_time_emb_proj_weight | |
l__self___up_blocks_1_resnets_0_time_emb_proj_bias = self.L__self___up_blocks_1_resnets_0_time_emb_proj_bias | |
out_31 = torch._C._nn.linear(temb_22, l__self___up_blocks_1_resnets_0_time_emb_proj_weight, l__self___up_blocks_1_resnets_0_time_emb_proj_bias); temb_22 = l__self___up_blocks_1_resnets_0_time_emb_proj_weight = l__self___up_blocks_1_resnets_0_time_emb_proj_bias = None | |
temb_23 = out_31[(slice(None, None, None), slice(None, None, None), None, None)]; out_31 = None | |
hidden_states_237 = hidden_states_236 + temb_23; hidden_states_236 = temb_23 = None | |
hidden_states_238 = self.L__self___up_blocks_1_resnets_0_norm2(hidden_states_237); hidden_states_237 = None | |
hidden_states_239 = self.L__self___time_embedding_act(hidden_states_238); hidden_states_238 = None | |
hidden_states_240 = self.L__self___up_blocks_1_resnets_0_dropout(hidden_states_239); hidden_states_239 = None | |
l__self___up_blocks_1_resnets_0_conv2_weight = self.L__self___up_blocks_1_resnets_0_conv2_weight | |
l__self___up_blocks_1_resnets_0_conv2_bias = self.L__self___up_blocks_1_resnets_0_conv2_bias | |
hidden_states_241 = torch.conv2d(hidden_states_240, l__self___up_blocks_1_resnets_0_conv2_weight, l__self___up_blocks_1_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_240 = l__self___up_blocks_1_resnets_0_conv2_weight = l__self___up_blocks_1_resnets_0_conv2_bias = None | |
l__self___up_blocks_1_resnets_0_conv_shortcut_weight = self.L__self___up_blocks_1_resnets_0_conv_shortcut_weight | |
l__self___up_blocks_1_resnets_0_conv_shortcut_bias = self.L__self___up_blocks_1_resnets_0_conv_shortcut_bias | |
input_tensor_5 = torch.conv2d(hidden_states_233, l__self___up_blocks_1_resnets_0_conv_shortcut_weight, l__self___up_blocks_1_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_233 = l__self___up_blocks_1_resnets_0_conv_shortcut_weight = l__self___up_blocks_1_resnets_0_conv_shortcut_bias = None | |
add_32 = input_tensor_5 + hidden_states_241; input_tensor_5 = hidden_states_241 = None | |
residual_8 = add_32 / 1.0; add_32 = None | |
hidden_states_243 = self.L__self___up_blocks_1_attentions_0_norm(residual_8) | |
permute_16 = hidden_states_243.permute(0, 2, 3, 1); hidden_states_243 = None | |
hidden_states_244 = permute_16.reshape(2, 4096, 640); permute_16 = None | |
l__self___up_blocks_1_attentions_0_proj_in_weight = self.L__self___up_blocks_1_attentions_0_proj_in_weight | |
l__self___up_blocks_1_attentions_0_proj_in_bias = self.L__self___up_blocks_1_attentions_0_proj_in_bias | |
hidden_states_245 = torch._C._nn.linear(hidden_states_244, l__self___up_blocks_1_attentions_0_proj_in_weight, l__self___up_blocks_1_attentions_0_proj_in_bias); hidden_states_244 = l__self___up_blocks_1_attentions_0_proj_in_weight = l__self___up_blocks_1_attentions_0_proj_in_bias = None | |
hidden_states_246 = self.L__self___up_blocks_1_attentions_0_transformer_blocks_0(hidden_states_245, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_245 = None | |
hidden_states_247 = self.L__self___up_blocks_1_attentions_0_transformer_blocks_1(hidden_states_246, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_246 = None | |
l__self___up_blocks_1_attentions_0_proj_out_weight = self.L__self___up_blocks_1_attentions_0_proj_out_weight | |
l__self___up_blocks_1_attentions_0_proj_out_bias = self.L__self___up_blocks_1_attentions_0_proj_out_bias | |
hidden_states_248 = torch._C._nn.linear(hidden_states_247, l__self___up_blocks_1_attentions_0_proj_out_weight, l__self___up_blocks_1_attentions_0_proj_out_bias); hidden_states_247 = l__self___up_blocks_1_attentions_0_proj_out_weight = l__self___up_blocks_1_attentions_0_proj_out_bias = None | |
reshape_18 = hidden_states_248.reshape(2, 64, 64, 640); hidden_states_248 = None | |
permute_17 = reshape_18.permute(0, 3, 1, 2); reshape_18 = None | |
hidden_states_249 = permute_17.contiguous(); permute_17 = None | |
hidden_states_250 = hidden_states_249 + residual_8; hidden_states_249 = residual_8 = None | |
hidden_states_252 = torch.cat([hidden_states_250, res_hidden_states_4], dim = 1); hidden_states_250 = res_hidden_states_4 = None | |
hidden_states_253 = self.L__self___up_blocks_1_resnets_1_norm1(hidden_states_252) | |
hidden_states_254 = self.L__self___time_embedding_act(hidden_states_253); hidden_states_253 = None | |
l__self___up_blocks_1_resnets_1_conv1_weight = self.L__self___up_blocks_1_resnets_1_conv1_weight | |
l__self___up_blocks_1_resnets_1_conv1_bias = self.L__self___up_blocks_1_resnets_1_conv1_bias | |
hidden_states_255 = torch.conv2d(hidden_states_254, l__self___up_blocks_1_resnets_1_conv1_weight, l__self___up_blocks_1_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_254 = l__self___up_blocks_1_resnets_1_conv1_weight = l__self___up_blocks_1_resnets_1_conv1_bias = None | |
temb_24 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_1_resnets_1_time_emb_proj_weight = self.L__self___up_blocks_1_resnets_1_time_emb_proj_weight | |
l__self___up_blocks_1_resnets_1_time_emb_proj_bias = self.L__self___up_blocks_1_resnets_1_time_emb_proj_bias | |
out_34 = torch._C._nn.linear(temb_24, l__self___up_blocks_1_resnets_1_time_emb_proj_weight, l__self___up_blocks_1_resnets_1_time_emb_proj_bias); temb_24 = l__self___up_blocks_1_resnets_1_time_emb_proj_weight = l__self___up_blocks_1_resnets_1_time_emb_proj_bias = None | |
temb_25 = out_34[(slice(None, None, None), slice(None, None, None), None, None)]; out_34 = None | |
hidden_states_256 = hidden_states_255 + temb_25; hidden_states_255 = temb_25 = None | |
hidden_states_257 = self.L__self___up_blocks_1_resnets_1_norm2(hidden_states_256); hidden_states_256 = None | |
hidden_states_258 = self.L__self___time_embedding_act(hidden_states_257); hidden_states_257 = None | |
hidden_states_259 = self.L__self___up_blocks_1_resnets_1_dropout(hidden_states_258); hidden_states_258 = None | |
l__self___up_blocks_1_resnets_1_conv2_weight = self.L__self___up_blocks_1_resnets_1_conv2_weight | |
l__self___up_blocks_1_resnets_1_conv2_bias = self.L__self___up_blocks_1_resnets_1_conv2_bias | |
hidden_states_260 = torch.conv2d(hidden_states_259, l__self___up_blocks_1_resnets_1_conv2_weight, l__self___up_blocks_1_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_259 = l__self___up_blocks_1_resnets_1_conv2_weight = l__self___up_blocks_1_resnets_1_conv2_bias = None | |
l__self___up_blocks_1_resnets_1_conv_shortcut_weight = self.L__self___up_blocks_1_resnets_1_conv_shortcut_weight | |
l__self___up_blocks_1_resnets_1_conv_shortcut_bias = self.L__self___up_blocks_1_resnets_1_conv_shortcut_bias | |
input_tensor_6 = torch.conv2d(hidden_states_252, l__self___up_blocks_1_resnets_1_conv_shortcut_weight, l__self___up_blocks_1_resnets_1_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_252 = l__self___up_blocks_1_resnets_1_conv_shortcut_weight = l__self___up_blocks_1_resnets_1_conv_shortcut_bias = None | |
add_35 = input_tensor_6 + hidden_states_260; input_tensor_6 = hidden_states_260 = None | |
residual_9 = add_35 / 1.0; add_35 = None | |
hidden_states_262 = self.L__self___up_blocks_1_attentions_1_norm(residual_9) | |
permute_18 = hidden_states_262.permute(0, 2, 3, 1); hidden_states_262 = None | |
hidden_states_263 = permute_18.reshape(2, 4096, 640); permute_18 = None | |
l__self___up_blocks_1_attentions_1_proj_in_weight = self.L__self___up_blocks_1_attentions_1_proj_in_weight | |
l__self___up_blocks_1_attentions_1_proj_in_bias = self.L__self___up_blocks_1_attentions_1_proj_in_bias | |
hidden_states_264 = torch._C._nn.linear(hidden_states_263, l__self___up_blocks_1_attentions_1_proj_in_weight, l__self___up_blocks_1_attentions_1_proj_in_bias); hidden_states_263 = l__self___up_blocks_1_attentions_1_proj_in_weight = l__self___up_blocks_1_attentions_1_proj_in_bias = None | |
hidden_states_265 = self.L__self___up_blocks_1_attentions_1_transformer_blocks_0(hidden_states_264, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_264 = None | |
hidden_states_266 = self.L__self___up_blocks_1_attentions_1_transformer_blocks_1(hidden_states_265, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_265 = None | |
l__self___up_blocks_1_attentions_1_proj_out_weight = self.L__self___up_blocks_1_attentions_1_proj_out_weight | |
l__self___up_blocks_1_attentions_1_proj_out_bias = self.L__self___up_blocks_1_attentions_1_proj_out_bias | |
hidden_states_267 = torch._C._nn.linear(hidden_states_266, l__self___up_blocks_1_attentions_1_proj_out_weight, l__self___up_blocks_1_attentions_1_proj_out_bias); hidden_states_266 = l__self___up_blocks_1_attentions_1_proj_out_weight = l__self___up_blocks_1_attentions_1_proj_out_bias = None | |
reshape_20 = hidden_states_267.reshape(2, 64, 64, 640); hidden_states_267 = None | |
permute_19 = reshape_20.permute(0, 3, 1, 2); reshape_20 = None | |
hidden_states_268 = permute_19.contiguous(); permute_19 = None | |
hidden_states_269 = hidden_states_268 + residual_9; hidden_states_268 = residual_9 = None | |
hidden_states_271 = torch.cat([hidden_states_269, res_hidden_states_5], dim = 1); hidden_states_269 = res_hidden_states_5 = None | |
hidden_states_272 = self.L__self___up_blocks_1_resnets_2_norm1(hidden_states_271) | |
hidden_states_273 = self.L__self___time_embedding_act(hidden_states_272); hidden_states_272 = None | |
l__self___up_blocks_1_resnets_2_conv1_weight = self.L__self___up_blocks_1_resnets_2_conv1_weight | |
l__self___up_blocks_1_resnets_2_conv1_bias = self.L__self___up_blocks_1_resnets_2_conv1_bias | |
hidden_states_274 = torch.conv2d(hidden_states_273, l__self___up_blocks_1_resnets_2_conv1_weight, l__self___up_blocks_1_resnets_2_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_273 = l__self___up_blocks_1_resnets_2_conv1_weight = l__self___up_blocks_1_resnets_2_conv1_bias = None | |
temb_26 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_1_resnets_2_time_emb_proj_weight = self.L__self___up_blocks_1_resnets_2_time_emb_proj_weight | |
l__self___up_blocks_1_resnets_2_time_emb_proj_bias = self.L__self___up_blocks_1_resnets_2_time_emb_proj_bias | |
out_37 = torch._C._nn.linear(temb_26, l__self___up_blocks_1_resnets_2_time_emb_proj_weight, l__self___up_blocks_1_resnets_2_time_emb_proj_bias); temb_26 = l__self___up_blocks_1_resnets_2_time_emb_proj_weight = l__self___up_blocks_1_resnets_2_time_emb_proj_bias = None | |
temb_27 = out_37[(slice(None, None, None), slice(None, None, None), None, None)]; out_37 = None | |
hidden_states_275 = hidden_states_274 + temb_27; hidden_states_274 = temb_27 = None | |
hidden_states_276 = self.L__self___up_blocks_1_resnets_2_norm2(hidden_states_275); hidden_states_275 = None | |
hidden_states_277 = self.L__self___time_embedding_act(hidden_states_276); hidden_states_276 = None | |
hidden_states_278 = self.L__self___up_blocks_1_resnets_2_dropout(hidden_states_277); hidden_states_277 = None | |
l__self___up_blocks_1_resnets_2_conv2_weight = self.L__self___up_blocks_1_resnets_2_conv2_weight | |
l__self___up_blocks_1_resnets_2_conv2_bias = self.L__self___up_blocks_1_resnets_2_conv2_bias | |
hidden_states_279 = torch.conv2d(hidden_states_278, l__self___up_blocks_1_resnets_2_conv2_weight, l__self___up_blocks_1_resnets_2_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_278 = l__self___up_blocks_1_resnets_2_conv2_weight = l__self___up_blocks_1_resnets_2_conv2_bias = None | |
l__self___up_blocks_1_resnets_2_conv_shortcut_weight = self.L__self___up_blocks_1_resnets_2_conv_shortcut_weight | |
l__self___up_blocks_1_resnets_2_conv_shortcut_bias = self.L__self___up_blocks_1_resnets_2_conv_shortcut_bias | |
input_tensor_7 = torch.conv2d(hidden_states_271, l__self___up_blocks_1_resnets_2_conv_shortcut_weight, l__self___up_blocks_1_resnets_2_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_271 = l__self___up_blocks_1_resnets_2_conv_shortcut_weight = l__self___up_blocks_1_resnets_2_conv_shortcut_bias = None | |
add_38 = input_tensor_7 + hidden_states_279; input_tensor_7 = hidden_states_279 = None | |
residual_10 = add_38 / 1.0; add_38 = None | |
hidden_states_281 = self.L__self___up_blocks_1_attentions_2_norm(residual_10) | |
permute_20 = hidden_states_281.permute(0, 2, 3, 1); hidden_states_281 = None | |
hidden_states_282 = permute_20.reshape(2, 4096, 640); permute_20 = None | |
l__self___up_blocks_1_attentions_2_proj_in_weight = self.L__self___up_blocks_1_attentions_2_proj_in_weight | |
l__self___up_blocks_1_attentions_2_proj_in_bias = self.L__self___up_blocks_1_attentions_2_proj_in_bias | |
hidden_states_283 = torch._C._nn.linear(hidden_states_282, l__self___up_blocks_1_attentions_2_proj_in_weight, l__self___up_blocks_1_attentions_2_proj_in_bias); hidden_states_282 = l__self___up_blocks_1_attentions_2_proj_in_weight = l__self___up_blocks_1_attentions_2_proj_in_bias = None | |
hidden_states_284 = self.L__self___up_blocks_1_attentions_2_transformer_blocks_0(hidden_states_283, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_283 = None | |
hidden_states_285 = self.L__self___up_blocks_1_attentions_2_transformer_blocks_1(hidden_states_284, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None); hidden_states_284 = l_encoder_hidden_states_ = None | |
l__self___up_blocks_1_attentions_2_proj_out_weight = self.L__self___up_blocks_1_attentions_2_proj_out_weight | |
l__self___up_blocks_1_attentions_2_proj_out_bias = self.L__self___up_blocks_1_attentions_2_proj_out_bias | |
hidden_states_286 = torch._C._nn.linear(hidden_states_285, l__self___up_blocks_1_attentions_2_proj_out_weight, l__self___up_blocks_1_attentions_2_proj_out_bias); hidden_states_285 = l__self___up_blocks_1_attentions_2_proj_out_weight = l__self___up_blocks_1_attentions_2_proj_out_bias = None | |
reshape_22 = hidden_states_286.reshape(2, 64, 64, 640); hidden_states_286 = None | |
permute_21 = reshape_22.permute(0, 3, 1, 2); reshape_22 = None | |
hidden_states_287 = permute_21.contiguous(); permute_21 = None | |
hidden_states_288 = hidden_states_287 + residual_10; hidden_states_287 = residual_10 = None | |
hidden_states_289 = torch.nn.functional.interpolate(hidden_states_288, scale_factor = 2.0, mode = 'nearest'); hidden_states_288 = None | |
l__self___up_blocks_1_upsamplers_0_conv_weight = self.L__self___up_blocks_1_upsamplers_0_conv_weight | |
l__self___up_blocks_1_upsamplers_0_conv_bias = self.L__self___up_blocks_1_upsamplers_0_conv_bias | |
sample_12 = torch.conv2d(hidden_states_289, l__self___up_blocks_1_upsamplers_0_conv_weight, l__self___up_blocks_1_upsamplers_0_conv_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_289 = l__self___up_blocks_1_upsamplers_0_conv_weight = l__self___up_blocks_1_upsamplers_0_conv_bias = None | |
hidden_states_293 = torch.cat([sample_12, res_hidden_states_6], dim = 1); sample_12 = res_hidden_states_6 = None | |
hidden_states_294 = self.L__self___up_blocks_2_resnets_0_norm1(hidden_states_293) | |
hidden_states_295 = self.L__self___time_embedding_act(hidden_states_294); hidden_states_294 = None | |
l__self___up_blocks_2_resnets_0_conv1_weight = self.L__self___up_blocks_2_resnets_0_conv1_weight | |
l__self___up_blocks_2_resnets_0_conv1_bias = self.L__self___up_blocks_2_resnets_0_conv1_bias | |
hidden_states_296 = torch.conv2d(hidden_states_295, l__self___up_blocks_2_resnets_0_conv1_weight, l__self___up_blocks_2_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_295 = l__self___up_blocks_2_resnets_0_conv1_weight = l__self___up_blocks_2_resnets_0_conv1_bias = None | |
temb_28 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_2_resnets_0_time_emb_proj_weight = self.L__self___up_blocks_2_resnets_0_time_emb_proj_weight | |
l__self___up_blocks_2_resnets_0_time_emb_proj_bias = self.L__self___up_blocks_2_resnets_0_time_emb_proj_bias | |
out_40 = torch._C._nn.linear(temb_28, l__self___up_blocks_2_resnets_0_time_emb_proj_weight, l__self___up_blocks_2_resnets_0_time_emb_proj_bias); temb_28 = l__self___up_blocks_2_resnets_0_time_emb_proj_weight = l__self___up_blocks_2_resnets_0_time_emb_proj_bias = None | |
temb_29 = out_40[(slice(None, None, None), slice(None, None, None), None, None)]; out_40 = None | |
hidden_states_297 = hidden_states_296 + temb_29; hidden_states_296 = temb_29 = None | |
hidden_states_298 = self.L__self___up_blocks_2_resnets_0_norm2(hidden_states_297); hidden_states_297 = None | |
hidden_states_299 = self.L__self___time_embedding_act(hidden_states_298); hidden_states_298 = None | |
hidden_states_300 = self.L__self___up_blocks_2_resnets_0_dropout(hidden_states_299); hidden_states_299 = None | |
l__self___up_blocks_2_resnets_0_conv2_weight = self.L__self___up_blocks_2_resnets_0_conv2_weight | |
l__self___up_blocks_2_resnets_0_conv2_bias = self.L__self___up_blocks_2_resnets_0_conv2_bias | |
hidden_states_301 = torch.conv2d(hidden_states_300, l__self___up_blocks_2_resnets_0_conv2_weight, l__self___up_blocks_2_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_300 = l__self___up_blocks_2_resnets_0_conv2_weight = l__self___up_blocks_2_resnets_0_conv2_bias = None | |
l__self___up_blocks_2_resnets_0_conv_shortcut_weight = self.L__self___up_blocks_2_resnets_0_conv_shortcut_weight | |
l__self___up_blocks_2_resnets_0_conv_shortcut_bias = self.L__self___up_blocks_2_resnets_0_conv_shortcut_bias | |
input_tensor_8 = torch.conv2d(hidden_states_293, l__self___up_blocks_2_resnets_0_conv_shortcut_weight, l__self___up_blocks_2_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_293 = l__self___up_blocks_2_resnets_0_conv_shortcut_weight = l__self___up_blocks_2_resnets_0_conv_shortcut_bias = None | |
add_41 = input_tensor_8 + hidden_states_301; input_tensor_8 = hidden_states_301 = None | |
hidden_states_302 = add_41 / 1.0; add_41 = None | |
hidden_states_304 = torch.cat([hidden_states_302, res_hidden_states_7], dim = 1); hidden_states_302 = res_hidden_states_7 = None | |
hidden_states_305 = self.L__self___up_blocks_2_resnets_1_norm1(hidden_states_304) | |
hidden_states_306 = self.L__self___time_embedding_act(hidden_states_305); hidden_states_305 = None | |
l__self___up_blocks_2_resnets_1_conv1_weight = self.L__self___up_blocks_2_resnets_1_conv1_weight | |
l__self___up_blocks_2_resnets_1_conv1_bias = self.L__self___up_blocks_2_resnets_1_conv1_bias | |
hidden_states_307 = torch.conv2d(hidden_states_306, l__self___up_blocks_2_resnets_1_conv1_weight, l__self___up_blocks_2_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_306 = l__self___up_blocks_2_resnets_1_conv1_weight = l__self___up_blocks_2_resnets_1_conv1_bias = None | |
temb_30 = self.L__self___time_embedding_act(emb_11) | |
l__self___up_blocks_2_resnets_1_time_emb_proj_weight = self.L__self___up_blocks_2_resnets_1_time_emb_proj_weight | |
l__self___up_blocks_2_resnets_1_time_emb_proj_bias = self.L__self___up_blocks_2_resnets_1_time_emb_proj_bias | |
out_41 = torch._C._nn.linear(temb_30, l__self___up_blocks_2_resnets_1_time_emb_proj_weight, l__self___up_blocks_2_resnets_1_time_emb_proj_bias); temb_30 = l__self___up_blocks_2_resnets_1_time_emb_proj_weight = l__self___up_blocks_2_resnets_1_time_emb_proj_bias = None | |
temb_31 = out_41[(slice(None, None, None), slice(None, None, None), None, None)]; out_41 = None | |
hidden_states_308 = hidden_states_307 + temb_31; hidden_states_307 = temb_31 = None | |
hidden_states_309 = self.L__self___up_blocks_2_resnets_1_norm2(hidden_states_308); hidden_states_308 = None | |
hidden_states_310 = self.L__self___time_embedding_act(hidden_states_309); hidden_states_309 = None | |
hidden_states_311 = self.L__self___up_blocks_2_resnets_1_dropout(hidden_states_310); hidden_states_310 = None | |
l__self___up_blocks_2_resnets_1_conv2_weight = self.L__self___up_blocks_2_resnets_1_conv2_weight | |
l__self___up_blocks_2_resnets_1_conv2_bias = self.L__self___up_blocks_2_resnets_1_conv2_bias | |
hidden_states_312 = torch.conv2d(hidden_states_311, l__self___up_blocks_2_resnets_1_conv2_weight, l__self___up_blocks_2_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_311 = l__self___up_blocks_2_resnets_1_conv2_weight = l__self___up_blocks_2_resnets_1_conv2_bias = None | |
l__self___up_blocks_2_resnets_1_conv_shortcut_weight = self.L__self___up_blocks_2_resnets_1_conv_shortcut_weight | |
l__self___up_blocks_2_resnets_1_conv_shortcut_bias = self.L__self___up_blocks_2_resnets_1_conv_shortcut_bias | |
input_tensor_9 = torch.conv2d(hidden_states_304, l__self___up_blocks_2_resnets_1_conv_shortcut_weight, l__self___up_blocks_2_resnets_1_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_304 = l__self___up_blocks_2_resnets_1_conv_shortcut_weight = l__self___up_blocks_2_resnets_1_conv_shortcut_bias = None | |
add_43 = input_tensor_9 + hidden_states_312; input_tensor_9 = hidden_states_312 = None | |
hidden_states_313 = add_43 / 1.0; add_43 = None | |
hidden_states_315 = torch.cat([hidden_states_313, res_hidden_states_8], dim = 1); hidden_states_313 = res_hidden_states_8 = None | |
hidden_states_316 = self.L__self___up_blocks_2_resnets_2_norm1(hidden_states_315) | |
hidden_states_317 = self.L__self___time_embedding_act(hidden_states_316); hidden_states_316 = None | |
l__self___up_blocks_2_resnets_2_conv1_weight = self.L__self___up_blocks_2_resnets_2_conv1_weight | |
l__self___up_blocks_2_resnets_2_conv1_bias = self.L__self___up_blocks_2_resnets_2_conv1_bias | |
hidden_states_318 = torch.conv2d(hidden_states_317, l__self___up_blocks_2_resnets_2_conv1_weight, l__self___up_blocks_2_resnets_2_conv1_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_317 = l__self___up_blocks_2_resnets_2_conv1_weight = l__self___up_blocks_2_resnets_2_conv1_bias = None | |
temb_32 = self.L__self___time_embedding_act(emb_11); emb_11 = None | |
l__self___up_blocks_2_resnets_2_time_emb_proj_weight = self.L__self___up_blocks_2_resnets_2_time_emb_proj_weight | |
l__self___up_blocks_2_resnets_2_time_emb_proj_bias = self.L__self___up_blocks_2_resnets_2_time_emb_proj_bias | |
out_42 = torch._C._nn.linear(temb_32, l__self___up_blocks_2_resnets_2_time_emb_proj_weight, l__self___up_blocks_2_resnets_2_time_emb_proj_bias); temb_32 = l__self___up_blocks_2_resnets_2_time_emb_proj_weight = l__self___up_blocks_2_resnets_2_time_emb_proj_bias = None | |
temb_33 = out_42[(slice(None, None, None), slice(None, None, None), None, None)]; out_42 = None | |
hidden_states_319 = hidden_states_318 + temb_33; hidden_states_318 = temb_33 = None | |
hidden_states_320 = self.L__self___up_blocks_2_resnets_2_norm2(hidden_states_319); hidden_states_319 = None | |
hidden_states_321 = self.L__self___time_embedding_act(hidden_states_320); hidden_states_320 = None | |
hidden_states_322 = self.L__self___up_blocks_2_resnets_2_dropout(hidden_states_321); hidden_states_321 = None | |
l__self___up_blocks_2_resnets_2_conv2_weight = self.L__self___up_blocks_2_resnets_2_conv2_weight | |
l__self___up_blocks_2_resnets_2_conv2_bias = self.L__self___up_blocks_2_resnets_2_conv2_bias | |
hidden_states_323 = torch.conv2d(hidden_states_322, l__self___up_blocks_2_resnets_2_conv2_weight, l__self___up_blocks_2_resnets_2_conv2_bias, (1, 1), (1, 1), (1, 1), 1); hidden_states_322 = l__self___up_blocks_2_resnets_2_conv2_weight = l__self___up_blocks_2_resnets_2_conv2_bias = None | |
l__self___up_blocks_2_resnets_2_conv_shortcut_weight = self.L__self___up_blocks_2_resnets_2_conv_shortcut_weight | |
l__self___up_blocks_2_resnets_2_conv_shortcut_bias = self.L__self___up_blocks_2_resnets_2_conv_shortcut_bias | |
input_tensor_10 = torch.conv2d(hidden_states_315, l__self___up_blocks_2_resnets_2_conv_shortcut_weight, l__self___up_blocks_2_resnets_2_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1); hidden_states_315 = l__self___up_blocks_2_resnets_2_conv_shortcut_weight = l__self___up_blocks_2_resnets_2_conv_shortcut_bias = None | |
add_45 = input_tensor_10 + hidden_states_323; input_tensor_10 = hidden_states_323 = None | |
sample_13 = add_45 / 1.0; add_45 = None | |
sample_14 = self.L__self___conv_norm_out(sample_13); sample_13 = None | |
sample_15 = self.L__self___time_embedding_act(sample_14); sample_14 = None | |
sample_16 = self.L__self___conv_out(sample_15); sample_15 = None | |
return (sample_16,) | |
mod = Repro() | |
def load_args(reader): | |
buf0 = reader.storage('74c1111a8ba16b7a9e0692d1b22a3bb3f7a2de1e', 262144, device=device(type='cuda', index=0), dtype_hint=torch.float16) | |
reader.tensor(buf0, (2, 4, 128, 128), dtype=torch.float16, is_leaf=True) # L_sample_ | |
buf1 = reader.storage('1ef05ec122a136dbfeb7bca9c18d685e58284271', 160, device=device(type='cuda', index=0)) | |
reader.tensor(buf1, (), is_leaf=True) # L_timestep_ | |
buf2 = reader.storage('3c5868917b64af67510e0ccde816043463effeed', 5120, device=device(type='cuda', index=0), dtype_hint=torch.float16) | |
reader.tensor(buf2, (2, 1280), dtype=torch.float16, is_leaf=True) # L_added_cond_kwargs_text_embeds_ | |
buf3 = reader.storage('53b60f9d6318b06eab596e8e3412589052cadb30', 24, device=device(type='cuda', index=0), dtype_hint=torch.float16) | |
reader.tensor(buf3, (2, 6), dtype=torch.float16, is_leaf=True) # L_added_cond_kwargs_time_ids_ | |
buf4 = reader.storage('8887258b000c59dd41b243f7282424a00d7d421b', 630784, device=device(type='cuda', index=0), dtype_hint=torch.float16) | |
reader.tensor(buf4, (2, 77, 2048), dtype=torch.float16, is_leaf=True) # L_encoder_hidden_states_ | |
load_args._version = 0 | |
if __name__ == '__main__': | |
from torch._dynamo.repro.after_dynamo import run_repro | |
run_repro(mod, load_args, accuracy=False, command='minify', | |
save_dir='/mnt/zeph/home/emil/Projects/rocm_test/torch_compile_debug/run_2023_11_14_07_11_57_623143-pid_9406/minifier/checkpoints', autocast=False, backend=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment