sssemil · November 14, 2023 06:17
diff --git a/minifier_launcher.py b/minifier_launcher.py

 from math import inf
 import torch
 from torch import tensor, device
 import torch.fx as fx
 import torch._dynamo
 from torch._dynamo.testing import rand_strided
 from torch._dynamo.debug_utils import run_fwd_maybe_bwd

 import torch._dynamo.config
 import torch._inductor.config
 import torch._functorch.config
 import torch.fx.experimental._config









 from torch.nn import *
 class Repro(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.L__self___time_embedding_act = SiLU()
        self.L__self___conv_in = Conv2d(4, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)).cuda()
        self.L__self___down_blocks_0_resnets_0_norm1 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_0_resnets_0_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_0_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___down_blocks_0_resnets_1_norm1 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_0_resnets_1_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_0_resnets_1_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___down_blocks_1_resnets_0_norm1 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_1_resnets_0_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_1_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___down_blocks_1_attentions_0_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda()
        self.L__self___down_blocks_1_attentions_0_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_1_attentions_0_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_1_resnets_1_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_1_resnets_1_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_1_resnets_1_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___down_blocks_1_attentions_1_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda()
        self.L__self___down_blocks_1_attentions_1_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_1_attentions_1_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_resnets_0_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_2_resnets_0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_2_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___down_blocks_2_attentions_0_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_2 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_3 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_4 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_5 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_6 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_7 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_8 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_0_transformer_blocks_9 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_resnets_1_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_2_resnets_1_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___down_blocks_2_resnets_1_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___down_blocks_2_attentions_1_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_2 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_3 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_4 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_5 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_6 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_7 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_8 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___down_blocks_2_attentions_1_transformer_blocks_9 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_resnets_0_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___mid_block_resnets_0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___mid_block_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___mid_block_attentions_0_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_2 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_3 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_4 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_5 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_6 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_7 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_8 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_attentions_0_transformer_blocks_9 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___mid_block_resnets_slice_1__None__None___0_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___mid_block_resnets_slice_1__None__None___0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___mid_block_resnets_slice_1__None__None___0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_0_resnets_0_norm1 = GroupNorm(32, 2560, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_0_resnets_0_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_0_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_0_attentions_0_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_2 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_3 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_4 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_5 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_6 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_7 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_8 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_0_transformer_blocks_9 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_resnets_1_norm1 = GroupNorm(32, 2560, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_0_resnets_1_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_0_resnets_1_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_0_attentions_1_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_2 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_3 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_4 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_5 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_6 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_7 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_8 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_1_transformer_blocks_9 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_resnets_2_norm1 = GroupNorm(32, 1920, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_0_resnets_2_norm2 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_0_resnets_2_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_0_attentions_2_norm = GroupNorm(32, 1280, eps=1e-06, affine=True).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_2 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_3 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_4 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_5 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_6 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_7 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_8 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_0_attentions_2_transformer_blocks_9 = BasicTransformerBlock(
  (norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=1280, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=1280, out_features=1280, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=1280, out_features=10240, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=5120, out_features=1280, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_1_resnets_0_norm1 = GroupNorm(32, 1920, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_1_resnets_0_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_1_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_1_attentions_0_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda()
        self.L__self___up_blocks_1_attentions_0_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_1_attentions_0_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_1_resnets_1_norm1 = GroupNorm(32, 1280, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_1_resnets_1_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_1_resnets_1_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_1_attentions_1_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda()
        self.L__self___up_blocks_1_attentions_1_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_1_attentions_1_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_1_resnets_2_norm1 = GroupNorm(32, 960, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_1_resnets_2_norm2 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_1_resnets_2_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_1_attentions_2_norm = GroupNorm(32, 640, eps=1e-06, affine=True).cuda()
        self.L__self___up_blocks_1_attentions_2_transformer_blocks_0 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_1_attentions_2_transformer_blocks_1 = BasicTransformerBlock(
  (norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn1): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm2): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (attn2): Attention(
    (to_q): LoRACompatibleLinear(in_features=640, out_features=640, bias=False)
    (to_k): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_v): LoRACompatibleLinear(in_features=2048, out_features=640, bias=False)
    (to_out): ModuleList(
      (0): LoRACompatibleLinear(in_features=640, out_features=640, bias=True)
      (1): Dropout(p=0.0, inplace=False)
    )
  )
  (norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True)
  (ff): FeedForward(
    (net): ModuleList(
      (0): GEGLU(
        (proj): LoRACompatibleLinear(in_features=640, out_features=5120, bias=True)
      )
      (1): Dropout(p=0.0, inplace=False)
      (2): LoRACompatibleLinear(in_features=2560, out_features=640, bias=True)
    )
  )
 ).cuda()
        self.L__self___up_blocks_2_resnets_0_norm1 = GroupNorm(32, 960, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_2_resnets_0_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_2_resnets_0_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_2_resnets_1_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_2_resnets_1_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_2_resnets_1_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___up_blocks_2_resnets_2_norm1 = GroupNorm(32, 640, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_2_resnets_2_norm2 = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___up_blocks_2_resnets_2_dropout = Dropout(p=0.0, inplace=False)
        self.L__self___conv_norm_out = GroupNorm(32, 320, eps=1e-05, affine=True).cuda()
        self.L__self___conv_out = Conv2d(320, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)).cuda()
        self.L__self___time_embedding_linear_1_weight = torch.nn.Parameter(torch.randn([1280, 320], dtype=torch.float16, device="cuda"))
        self.L__self___time_embedding_linear_1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___time_embedding_linear_2_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___time_embedding_linear_2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___add_embedding_linear_1_weight = torch.nn.Parameter(torch.randn([1280, 2816], dtype=torch.float16, device="cuda"))
        self.L__self___add_embedding_linear_1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___add_embedding_linear_2_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___add_embedding_linear_2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_downsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_0_downsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([640, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 320, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_downsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_1_downsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 640, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___down_blocks_2_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([1280, 2560, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 2560, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([1280, 2560, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 2560, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_1_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_conv1_weight = torch.nn.Parameter(torch.randn([1280, 1920, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_conv1_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_time_emb_proj_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_time_emb_proj_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_conv2_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_conv2_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_conv_shortcut_weight = torch.nn.Parameter(torch.randn([1280, 1920, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_resnets_2_conv_shortcut_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_2_proj_in_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_2_proj_in_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_2_proj_out_weight = torch.nn.Parameter(torch.randn([1280, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_attentions_2_proj_out_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_upsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([1280, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_0_upsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([640, 1920, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 1920, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_0_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_0_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_0_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_0_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([640, 1280, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 1280, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_1_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_1_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_1_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_1_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_1_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_conv1_weight = torch.nn.Parameter(torch.randn([640, 960, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_conv1_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_time_emb_proj_weight = torch.nn.Parameter(torch.randn([640, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_time_emb_proj_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_conv2_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_conv2_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_conv_shortcut_weight = torch.nn.Parameter(torch.randn([640, 960, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_resnets_2_conv_shortcut_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_2_proj_in_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_2_proj_in_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_2_proj_out_weight = torch.nn.Parameter(torch.randn([640, 640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_attentions_2_proj_out_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_upsamplers_0_conv_weight = torch.nn.Parameter(torch.randn([640, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_1_upsamplers_0_conv_bias = torch.nn.Parameter(torch.randn([640], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_conv1_weight = torch.nn.Parameter(torch.randn([320, 960, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_conv_shortcut_weight = torch.nn.Parameter(torch.randn([320, 960, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_0_conv_shortcut_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_conv1_weight = torch.nn.Parameter(torch.randn([320, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_conv_shortcut_weight = torch.nn.Parameter(torch.randn([320, 640, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_1_conv_shortcut_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_conv1_weight = torch.nn.Parameter(torch.randn([320, 640, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_conv1_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_time_emb_proj_weight = torch.nn.Parameter(torch.randn([320, 1280], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_time_emb_proj_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_conv2_weight = torch.nn.Parameter(torch.randn([320, 320, 3, 3], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_conv2_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_conv_shortcut_weight = torch.nn.Parameter(torch.randn([320, 640, 1, 1], dtype=torch.float16, device="cuda"))
        self.L__self___up_blocks_2_resnets_2_conv_shortcut_bias = torch.nn.Parameter(torch.randn([320], dtype=torch.float16, device="cuda"))



    def forward(self, L_sample_ : torch.Tensor, L_timestep_ : torch.Tensor, L_added_cond_kwargs_text_embeds_ : torch.Tensor, L_added_cond_kwargs_time_ids_ : torch.Tensor, L_encoder_hidden_states_ : torch.Tensor):
        l_sample_ = L_sample_
        timesteps = L_timestep_
        text_embeds = L_added_cond_kwargs_text_embeds_
        time_ids = L_added_cond_kwargs_time_ids_
        l_encoder_hidden_states_ = L_encoder_hidden_states_
        getitem = timesteps[None];  timesteps = None
        timesteps_1 = getitem.to(device(type='cuda', index=0));  getitem = None
        timesteps_2 = timesteps_1.expand(2);  timesteps_1 = None
        arange = torch.arange(start = 0, end = 160, dtype = torch.float32, device = device(type='cuda', index=0))
        exponent = -9.210340371976184 * arange;  arange = None
        exponent_1 = exponent / 160;  exponent = None
        emb = torch.exp(exponent_1);  exponent_1 = None
        getitem_1 = timesteps_2[(slice(None, None, None), None)];  timesteps_2 = None
        float_1 = getitem_1.float();  getitem_1 = None
        getitem_2 = emb[(None, slice(None, None, None))];  emb = None
        emb_1 = float_1 * getitem_2;  float_1 = getitem_2 = None
        emb_2 = 1 * emb_1;  emb_1 = None
        sin = torch.sin(emb_2)
        cos = torch.cos(emb_2);  emb_2 = None
        emb_3 = torch.cat([sin, cos], dim = -1);  sin = cos = None
        getitem_3 = emb_3[(slice(None, None, None), slice(160, None, None))]
        getitem_4 = emb_3[(slice(None, None, None), slice(None, 160, None))];  emb_3 = None
        t_emb = torch.cat([getitem_3, getitem_4], dim = -1);  getitem_3 = getitem_4 = None
        t_emb_1 = t_emb.to(dtype = torch.float16);  t_emb = None
        l__self___time_embedding_linear_1_weight = self.L__self___time_embedding_linear_1_weight
        l__self___time_embedding_linear_1_bias = self.L__self___time_embedding_linear_1_bias
        sample = torch._C._nn.linear(t_emb_1, l__self___time_embedding_linear_1_weight, l__self___time_embedding_linear_1_bias);  t_emb_1 = l__self___time_embedding_linear_1_weight = l__self___time_embedding_linear_1_bias = None
        sample_1 = self.L__self___time_embedding_act(sample);  sample = None
        l__self___time_embedding_linear_2_weight = self.L__self___time_embedding_linear_2_weight
        l__self___time_embedding_linear_2_bias = self.L__self___time_embedding_linear_2_bias
        emb_5 = torch._C._nn.linear(sample_1, l__self___time_embedding_linear_2_weight, l__self___time_embedding_linear_2_bias);  sample_1 = l__self___time_embedding_linear_2_weight = l__self___time_embedding_linear_2_bias = None
        flatten = time_ids.flatten();  time_ids = None
        arange_1 = torch.arange(start = 0, end = 128, dtype = torch.float32, device = device(type='cuda', index=0))
        exponent_2 = -9.210340371976184 * arange_1;  arange_1 = None
        exponent_3 = exponent_2 / 128;  exponent_2 = None
        emb_6 = torch.exp(exponent_3);  exponent_3 = None
        getitem_5 = flatten[(slice(None, None, None), None)];  flatten = None
        float_2 = getitem_5.float();  getitem_5 = None
        getitem_6 = emb_6[(None, slice(None, None, None))];  emb_6 = None
        emb_7 = float_2 * getitem_6;  float_2 = getitem_6 = None
        emb_8 = 1 * emb_7;  emb_7 = None
        sin_1 = torch.sin(emb_8)
        cos_1 = torch.cos(emb_8);  emb_8 = None
        emb_9 = torch.cat([sin_1, cos_1], dim = -1);  sin_1 = cos_1 = None
        getitem_7 = emb_9[(slice(None, None, None), slice(128, None, None))]
        getitem_8 = emb_9[(slice(None, None, None), slice(None, 128, None))];  emb_9 = None
        time_embeds = torch.cat([getitem_7, getitem_8], dim = -1);  getitem_7 = getitem_8 = None
        time_embeds_1 = time_embeds.reshape((2, -1));  time_embeds = None
        add_embeds = torch.concat([text_embeds, time_embeds_1], dim = -1);  text_embeds = time_embeds_1 = None
        add_embeds_1 = add_embeds.to(torch.float16);  add_embeds = None
        l__self___add_embedding_linear_1_weight = self.L__self___add_embedding_linear_1_weight
        l__self___add_embedding_linear_1_bias = self.L__self___add_embedding_linear_1_bias
        sample_3 = torch._C._nn.linear(add_embeds_1, l__self___add_embedding_linear_1_weight, l__self___add_embedding_linear_1_bias);  add_embeds_1 = l__self___add_embedding_linear_1_weight = l__self___add_embedding_linear_1_bias = None
        sample_4 = self.L__self___time_embedding_act(sample_3);  sample_3 = None
        l__self___add_embedding_linear_2_weight = self.L__self___add_embedding_linear_2_weight
        l__self___add_embedding_linear_2_bias = self.L__self___add_embedding_linear_2_bias
        aug_emb = torch._C._nn.linear(sample_4, l__self___add_embedding_linear_2_weight, l__self___add_embedding_linear_2_bias);  sample_4 = l__self___add_embedding_linear_2_weight = l__self___add_embedding_linear_2_bias = None
        emb_11 = emb_5 + aug_emb;  emb_5 = aug_emb = None
        res_hidden_states_8 = self.L__self___conv_in(l_sample_);  l_sample_ = None
        hidden_states_1 = self.L__self___down_blocks_0_resnets_0_norm1(res_hidden_states_8)
        hidden_states_2 = self.L__self___time_embedding_act(hidden_states_1);  hidden_states_1 = None
        l__self___down_blocks_0_resnets_0_conv1_weight = self.L__self___down_blocks_0_resnets_0_conv1_weight
        l__self___down_blocks_0_resnets_0_conv1_bias = self.L__self___down_blocks_0_resnets_0_conv1_bias
        hidden_states_3 = torch.conv2d(hidden_states_2, l__self___down_blocks_0_resnets_0_conv1_weight, l__self___down_blocks_0_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_2 = l__self___down_blocks_0_resnets_0_conv1_weight = l__self___down_blocks_0_resnets_0_conv1_bias = None
        temb = self.L__self___time_embedding_act(emb_11)
        l__self___down_blocks_0_resnets_0_time_emb_proj_weight = self.L__self___down_blocks_0_resnets_0_time_emb_proj_weight
        l__self___down_blocks_0_resnets_0_time_emb_proj_bias = self.L__self___down_blocks_0_resnets_0_time_emb_proj_bias
        out_4 = torch._C._nn.linear(temb, l__self___down_blocks_0_resnets_0_time_emb_proj_weight, l__self___down_blocks_0_resnets_0_time_emb_proj_bias);  temb = l__self___down_blocks_0_resnets_0_time_emb_proj_weight = l__self___down_blocks_0_resnets_0_time_emb_proj_bias = None
        temb_1 = out_4[(slice(None, None, None), slice(None, None, None), None, None)];  out_4 = None
        hidden_states_4 = hidden_states_3 + temb_1;  hidden_states_3 = temb_1 = None
        hidden_states_5 = self.L__self___down_blocks_0_resnets_0_norm2(hidden_states_4);  hidden_states_4 = None
        hidden_states_6 = self.L__self___time_embedding_act(hidden_states_5);  hidden_states_5 = None
        hidden_states_7 = self.L__self___down_blocks_0_resnets_0_dropout(hidden_states_6);  hidden_states_6 = None
        l__self___down_blocks_0_resnets_0_conv2_weight = self.L__self___down_blocks_0_resnets_0_conv2_weight
        l__self___down_blocks_0_resnets_0_conv2_bias = self.L__self___down_blocks_0_resnets_0_conv2_bias
        hidden_states_8 = torch.conv2d(hidden_states_7, l__self___down_blocks_0_resnets_0_conv2_weight, l__self___down_blocks_0_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_7 = l__self___down_blocks_0_resnets_0_conv2_weight = l__self___down_blocks_0_resnets_0_conv2_bias = None
        add_2 = res_hidden_states_8 + hidden_states_8;  hidden_states_8 = None
        res_hidden_states_7 = add_2 / 1.0;  add_2 = None
        hidden_states_11 = self.L__self___down_blocks_0_resnets_1_norm1(res_hidden_states_7)
        hidden_states_12 = self.L__self___time_embedding_act(hidden_states_11);  hidden_states_11 = None
        l__self___down_blocks_0_resnets_1_conv1_weight = self.L__self___down_blocks_0_resnets_1_conv1_weight
        l__self___down_blocks_0_resnets_1_conv1_bias = self.L__self___down_blocks_0_resnets_1_conv1_bias
        hidden_states_13 = torch.conv2d(hidden_states_12, l__self___down_blocks_0_resnets_1_conv1_weight, l__self___down_blocks_0_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_12 = l__self___down_blocks_0_resnets_1_conv1_weight = l__self___down_blocks_0_resnets_1_conv1_bias = None
        temb_2 = self.L__self___time_embedding_act(emb_11)
        l__self___down_blocks_0_resnets_1_time_emb_proj_weight = self.L__self___down_blocks_0_resnets_1_time_emb_proj_weight
        l__self___down_blocks_0_resnets_1_time_emb_proj_bias = self.L__self___down_blocks_0_resnets_1_time_emb_proj_bias
        out_5 = torch._C._nn.linear(temb_2, l__self___down_blocks_0_resnets_1_time_emb_proj_weight, l__self___down_blocks_0_resnets_1_time_emb_proj_bias);  temb_2 = l__self___down_blocks_0_resnets_1_time_emb_proj_weight = l__self___down_blocks_0_resnets_1_time_emb_proj_bias = None
        temb_3 = out_5[(slice(None, None, None), slice(None, None, None), None, None)];  out_5 = None
        hidden_states_14 = hidden_states_13 + temb_3;  hidden_states_13 = temb_3 = None
        hidden_states_15 = self.L__self___down_blocks_0_resnets_1_norm2(hidden_states_14);  hidden_states_14 = None
        hidden_states_16 = self.L__self___time_embedding_act(hidden_states_15);  hidden_states_15 = None
        hidden_states_17 = self.L__self___down_blocks_0_resnets_1_dropout(hidden_states_16);  hidden_states_16 = None
        l__self___down_blocks_0_resnets_1_conv2_weight = self.L__self___down_blocks_0_resnets_1_conv2_weight
        l__self___down_blocks_0_resnets_1_conv2_bias = self.L__self___down_blocks_0_resnets_1_conv2_bias
        hidden_states_18 = torch.conv2d(hidden_states_17, l__self___down_blocks_0_resnets_1_conv2_weight, l__self___down_blocks_0_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_17 = l__self___down_blocks_0_resnets_1_conv2_weight = l__self___down_blocks_0_resnets_1_conv2_bias = None
        add_4 = res_hidden_states_7 + hidden_states_18;  hidden_states_18 = None
        res_hidden_states_6 = add_4 / 1.0;  add_4 = None
        l__self___down_blocks_0_downsamplers_0_conv_weight = self.L__self___down_blocks_0_downsamplers_0_conv_weight
        l__self___down_blocks_0_downsamplers_0_conv_bias = self.L__self___down_blocks_0_downsamplers_0_conv_bias
        res_hidden_states_5 = torch.conv2d(res_hidden_states_6, l__self___down_blocks_0_downsamplers_0_conv_weight, l__self___down_blocks_0_downsamplers_0_conv_bias, (2, 2), (1, 1), (1, 1), 1);  l__self___down_blocks_0_downsamplers_0_conv_weight = l__self___down_blocks_0_downsamplers_0_conv_bias = None
        hidden_states_23 = self.L__self___down_blocks_1_resnets_0_norm1(res_hidden_states_5)
        hidden_states_24 = self.L__self___time_embedding_act(hidden_states_23);  hidden_states_23 = None
        l__self___down_blocks_1_resnets_0_conv1_weight = self.L__self___down_blocks_1_resnets_0_conv1_weight
        l__self___down_blocks_1_resnets_0_conv1_bias = self.L__self___down_blocks_1_resnets_0_conv1_bias
        hidden_states_25 = torch.conv2d(hidden_states_24, l__self___down_blocks_1_resnets_0_conv1_weight, l__self___down_blocks_1_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_24 = l__self___down_blocks_1_resnets_0_conv1_weight = l__self___down_blocks_1_resnets_0_conv1_bias = None
        temb_4 = self.L__self___time_embedding_act(emb_11)
        l__self___down_blocks_1_resnets_0_time_emb_proj_weight = self.L__self___down_blocks_1_resnets_0_time_emb_proj_weight
        l__self___down_blocks_1_resnets_0_time_emb_proj_bias = self.L__self___down_blocks_1_resnets_0_time_emb_proj_bias
        out_6 = torch._C._nn.linear(temb_4, l__self___down_blocks_1_resnets_0_time_emb_proj_weight, l__self___down_blocks_1_resnets_0_time_emb_proj_bias);  temb_4 = l__self___down_blocks_1_resnets_0_time_emb_proj_weight = l__self___down_blocks_1_resnets_0_time_emb_proj_bias = None
        temb_5 = out_6[(slice(None, None, None), slice(None, None, None), None, None)];  out_6 = None
        hidden_states_26 = hidden_states_25 + temb_5;  hidden_states_25 = temb_5 = None
        hidden_states_27 = self.L__self___down_blocks_1_resnets_0_norm2(hidden_states_26);  hidden_states_26 = None
        hidden_states_28 = self.L__self___time_embedding_act(hidden_states_27);  hidden_states_27 = None
        hidden_states_29 = self.L__self___down_blocks_1_resnets_0_dropout(hidden_states_28);  hidden_states_28 = None
        l__self___down_blocks_1_resnets_0_conv2_weight = self.L__self___down_blocks_1_resnets_0_conv2_weight
        l__self___down_blocks_1_resnets_0_conv2_bias = self.L__self___down_blocks_1_resnets_0_conv2_bias
        hidden_states_30 = torch.conv2d(hidden_states_29, l__self___down_blocks_1_resnets_0_conv2_weight, l__self___down_blocks_1_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_29 = l__self___down_blocks_1_resnets_0_conv2_weight = l__self___down_blocks_1_resnets_0_conv2_bias = None
        l__self___down_blocks_1_resnets_0_conv_shortcut_weight = self.L__self___down_blocks_1_resnets_0_conv_shortcut_weight
        l__self___down_blocks_1_resnets_0_conv_shortcut_bias = self.L__self___down_blocks_1_resnets_0_conv_shortcut_bias
        input_tensor = torch.conv2d(res_hidden_states_5, l__self___down_blocks_1_resnets_0_conv_shortcut_weight, l__self___down_blocks_1_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  l__self___down_blocks_1_resnets_0_conv_shortcut_weight = l__self___down_blocks_1_resnets_0_conv_shortcut_bias = None
        add_6 = input_tensor + hidden_states_30;  input_tensor = hidden_states_30 = None
        residual = add_6 / 1.0;  add_6 = None
        hidden_states_32 = self.L__self___down_blocks_1_attentions_0_norm(residual)
        permute = hidden_states_32.permute(0, 2, 3, 1);  hidden_states_32 = None
        hidden_states_33 = permute.reshape(2, 4096, 640);  permute = None
        l__self___down_blocks_1_attentions_0_proj_in_weight = self.L__self___down_blocks_1_attentions_0_proj_in_weight
        l__self___down_blocks_1_attentions_0_proj_in_bias = self.L__self___down_blocks_1_attentions_0_proj_in_bias
        hidden_states_34 = torch._C._nn.linear(hidden_states_33, l__self___down_blocks_1_attentions_0_proj_in_weight, l__self___down_blocks_1_attentions_0_proj_in_bias);  hidden_states_33 = l__self___down_blocks_1_attentions_0_proj_in_weight = l__self___down_blocks_1_attentions_0_proj_in_bias = None
        hidden_states_35 = self.L__self___down_blocks_1_attentions_0_transformer_blocks_0(hidden_states_34, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_34 = None
        hidden_states_36 = self.L__self___down_blocks_1_attentions_0_transformer_blocks_1(hidden_states_35, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_35 = None
        l__self___down_blocks_1_attentions_0_proj_out_weight = self.L__self___down_blocks_1_attentions_0_proj_out_weight
        l__self___down_blocks_1_attentions_0_proj_out_bias = self.L__self___down_blocks_1_attentions_0_proj_out_bias
        hidden_states_37 = torch._C._nn.linear(hidden_states_36, l__self___down_blocks_1_attentions_0_proj_out_weight, l__self___down_blocks_1_attentions_0_proj_out_bias);  hidden_states_36 = l__self___down_blocks_1_attentions_0_proj_out_weight = l__self___down_blocks_1_attentions_0_proj_out_bias = None
        reshape_2 = hidden_states_37.reshape(2, 64, 64, 640);  hidden_states_37 = None
        permute_1 = reshape_2.permute(0, 3, 1, 2);  reshape_2 = None
        hidden_states_38 = permute_1.contiguous();  permute_1 = None
        res_hidden_states_4 = hidden_states_38 + residual;  hidden_states_38 = residual = None
        hidden_states_41 = self.L__self___down_blocks_1_resnets_1_norm1(res_hidden_states_4)
        hidden_states_42 = self.L__self___time_embedding_act(hidden_states_41);  hidden_states_41 = None
        l__self___down_blocks_1_resnets_1_conv1_weight = self.L__self___down_blocks_1_resnets_1_conv1_weight
        l__self___down_blocks_1_resnets_1_conv1_bias = self.L__self___down_blocks_1_resnets_1_conv1_bias
        hidden_states_43 = torch.conv2d(hidden_states_42, l__self___down_blocks_1_resnets_1_conv1_weight, l__self___down_blocks_1_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_42 = l__self___down_blocks_1_resnets_1_conv1_weight = l__self___down_blocks_1_resnets_1_conv1_bias = None
        temb_6 = self.L__self___time_embedding_act(emb_11)
        l__self___down_blocks_1_resnets_1_time_emb_proj_weight = self.L__self___down_blocks_1_resnets_1_time_emb_proj_weight
        l__self___down_blocks_1_resnets_1_time_emb_proj_bias = self.L__self___down_blocks_1_resnets_1_time_emb_proj_bias
        out_9 = torch._C._nn.linear(temb_6, l__self___down_blocks_1_resnets_1_time_emb_proj_weight, l__self___down_blocks_1_resnets_1_time_emb_proj_bias);  temb_6 = l__self___down_blocks_1_resnets_1_time_emb_proj_weight = l__self___down_blocks_1_resnets_1_time_emb_proj_bias = None
        temb_7 = out_9[(slice(None, None, None), slice(None, None, None), None, None)];  out_9 = None
        hidden_states_44 = hidden_states_43 + temb_7;  hidden_states_43 = temb_7 = None
        hidden_states_45 = self.L__self___down_blocks_1_resnets_1_norm2(hidden_states_44);  hidden_states_44 = None
        hidden_states_46 = self.L__self___time_embedding_act(hidden_states_45);  hidden_states_45 = None
        hidden_states_47 = self.L__self___down_blocks_1_resnets_1_dropout(hidden_states_46);  hidden_states_46 = None
        l__self___down_blocks_1_resnets_1_conv2_weight = self.L__self___down_blocks_1_resnets_1_conv2_weight
        l__self___down_blocks_1_resnets_1_conv2_bias = self.L__self___down_blocks_1_resnets_1_conv2_bias
        hidden_states_48 = torch.conv2d(hidden_states_47, l__self___down_blocks_1_resnets_1_conv2_weight, l__self___down_blocks_1_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_47 = l__self___down_blocks_1_resnets_1_conv2_weight = l__self___down_blocks_1_resnets_1_conv2_bias = None
        add_9 = res_hidden_states_4 + hidden_states_48;  hidden_states_48 = None
        residual_1 = add_9 / 1.0;  add_9 = None
        hidden_states_50 = self.L__self___down_blocks_1_attentions_1_norm(residual_1)
        permute_2 = hidden_states_50.permute(0, 2, 3, 1);  hidden_states_50 = None
        hidden_states_51 = permute_2.reshape(2, 4096, 640);  permute_2 = None
        l__self___down_blocks_1_attentions_1_proj_in_weight = self.L__self___down_blocks_1_attentions_1_proj_in_weight
        l__self___down_blocks_1_attentions_1_proj_in_bias = self.L__self___down_blocks_1_attentions_1_proj_in_bias
        hidden_states_52 = torch._C._nn.linear(hidden_states_51, l__self___down_blocks_1_attentions_1_proj_in_weight, l__self___down_blocks_1_attentions_1_proj_in_bias);  hidden_states_51 = l__self___down_blocks_1_attentions_1_proj_in_weight = l__self___down_blocks_1_attentions_1_proj_in_bias = None
        hidden_states_53 = self.L__self___down_blocks_1_attentions_1_transformer_blocks_0(hidden_states_52, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_52 = None
        hidden_states_54 = self.L__self___down_blocks_1_attentions_1_transformer_blocks_1(hidden_states_53, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_53 = None
        l__self___down_blocks_1_attentions_1_proj_out_weight = self.L__self___down_blocks_1_attentions_1_proj_out_weight
        l__self___down_blocks_1_attentions_1_proj_out_bias = self.L__self___down_blocks_1_attentions_1_proj_out_bias
        hidden_states_55 = torch._C._nn.linear(hidden_states_54, l__self___down_blocks_1_attentions_1_proj_out_weight, l__self___down_blocks_1_attentions_1_proj_out_bias);  hidden_states_54 = l__self___down_blocks_1_attentions_1_proj_out_weight = l__self___down_blocks_1_attentions_1_proj_out_bias = None
        reshape_4 = hidden_states_55.reshape(2, 64, 64, 640);  hidden_states_55 = None
        permute_3 = reshape_4.permute(0, 3, 1, 2);  reshape_4 = None
        hidden_states_56 = permute_3.contiguous();  permute_3 = None
        res_hidden_states_3 = hidden_states_56 + residual_1;  hidden_states_56 = residual_1 = None
        l__self___down_blocks_1_downsamplers_0_conv_weight = self.L__self___down_blocks_1_downsamplers_0_conv_weight
        l__self___down_blocks_1_downsamplers_0_conv_bias = self.L__self___down_blocks_1_downsamplers_0_conv_bias
        res_hidden_states_2 = torch.conv2d(res_hidden_states_3, l__self___down_blocks_1_downsamplers_0_conv_weight, l__self___down_blocks_1_downsamplers_0_conv_bias, (2, 2), (1, 1), (1, 1), 1);  l__self___down_blocks_1_downsamplers_0_conv_weight = l__self___down_blocks_1_downsamplers_0_conv_bias = None
        hidden_states_61 = self.L__self___down_blocks_2_resnets_0_norm1(res_hidden_states_2)
        hidden_states_62 = self.L__self___time_embedding_act(hidden_states_61);  hidden_states_61 = None
        l__self___down_blocks_2_resnets_0_conv1_weight = self.L__self___down_blocks_2_resnets_0_conv1_weight
        l__self___down_blocks_2_resnets_0_conv1_bias = self.L__self___down_blocks_2_resnets_0_conv1_bias
        hidden_states_63 = torch.conv2d(hidden_states_62, l__self___down_blocks_2_resnets_0_conv1_weight, l__self___down_blocks_2_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_62 = l__self___down_blocks_2_resnets_0_conv1_weight = l__self___down_blocks_2_resnets_0_conv1_bias = None
        temb_8 = self.L__self___time_embedding_act(emb_11)
        l__self___down_blocks_2_resnets_0_time_emb_proj_weight = self.L__self___down_blocks_2_resnets_0_time_emb_proj_weight
        l__self___down_blocks_2_resnets_0_time_emb_proj_bias = self.L__self___down_blocks_2_resnets_0_time_emb_proj_bias
        out_12 = torch._C._nn.linear(temb_8, l__self___down_blocks_2_resnets_0_time_emb_proj_weight, l__self___down_blocks_2_resnets_0_time_emb_proj_bias);  temb_8 = l__self___down_blocks_2_resnets_0_time_emb_proj_weight = l__self___down_blocks_2_resnets_0_time_emb_proj_bias = None
        temb_9 = out_12[(slice(None, None, None), slice(None, None, None), None, None)];  out_12 = None
        hidden_states_64 = hidden_states_63 + temb_9;  hidden_states_63 = temb_9 = None
        hidden_states_65 = self.L__self___down_blocks_2_resnets_0_norm2(hidden_states_64);  hidden_states_64 = None
        hidden_states_66 = self.L__self___time_embedding_act(hidden_states_65);  hidden_states_65 = None
        hidden_states_67 = self.L__self___down_blocks_2_resnets_0_dropout(hidden_states_66);  hidden_states_66 = None
        l__self___down_blocks_2_resnets_0_conv2_weight = self.L__self___down_blocks_2_resnets_0_conv2_weight
        l__self___down_blocks_2_resnets_0_conv2_bias = self.L__self___down_blocks_2_resnets_0_conv2_bias
        hidden_states_68 = torch.conv2d(hidden_states_67, l__self___down_blocks_2_resnets_0_conv2_weight, l__self___down_blocks_2_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_67 = l__self___down_blocks_2_resnets_0_conv2_weight = l__self___down_blocks_2_resnets_0_conv2_bias = None
        l__self___down_blocks_2_resnets_0_conv_shortcut_weight = self.L__self___down_blocks_2_resnets_0_conv_shortcut_weight
        l__self___down_blocks_2_resnets_0_conv_shortcut_bias = self.L__self___down_blocks_2_resnets_0_conv_shortcut_bias
        input_tensor_1 = torch.conv2d(res_hidden_states_2, l__self___down_blocks_2_resnets_0_conv_shortcut_weight, l__self___down_blocks_2_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  l__self___down_blocks_2_resnets_0_conv_shortcut_weight = l__self___down_blocks_2_resnets_0_conv_shortcut_bias = None
        add_12 = input_tensor_1 + hidden_states_68;  input_tensor_1 = hidden_states_68 = None
        residual_2 = add_12 / 1.0;  add_12 = None
        hidden_states_70 = self.L__self___down_blocks_2_attentions_0_norm(residual_2)
        permute_4 = hidden_states_70.permute(0, 2, 3, 1);  hidden_states_70 = None
        hidden_states_71 = permute_4.reshape(2, 1024, 1280);  permute_4 = None
        l__self___down_blocks_2_attentions_0_proj_in_weight = self.L__self___down_blocks_2_attentions_0_proj_in_weight
        l__self___down_blocks_2_attentions_0_proj_in_bias = self.L__self___down_blocks_2_attentions_0_proj_in_bias
        hidden_states_72 = torch._C._nn.linear(hidden_states_71, l__self___down_blocks_2_attentions_0_proj_in_weight, l__self___down_blocks_2_attentions_0_proj_in_bias);  hidden_states_71 = l__self___down_blocks_2_attentions_0_proj_in_weight = l__self___down_blocks_2_attentions_0_proj_in_bias = None
        hidden_states_73 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_0(hidden_states_72, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_72 = None
        hidden_states_74 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_1(hidden_states_73, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_73 = None
        hidden_states_75 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_2(hidden_states_74, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_74 = None
        hidden_states_76 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_3(hidden_states_75, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_75 = None
        hidden_states_77 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_4(hidden_states_76, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_76 = None
        hidden_states_78 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_5(hidden_states_77, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_77 = None
        hidden_states_79 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_6(hidden_states_78, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_78 = None
        hidden_states_80 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_7(hidden_states_79, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_79 = None
        hidden_states_81 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_8(hidden_states_80, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_80 = None
        hidden_states_82 = self.L__self___down_blocks_2_attentions_0_transformer_blocks_9(hidden_states_81, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_81 = None
        l__self___down_blocks_2_attentions_0_proj_out_weight = self.L__self___down_blocks_2_attentions_0_proj_out_weight
        l__self___down_blocks_2_attentions_0_proj_out_bias = self.L__self___down_blocks_2_attentions_0_proj_out_bias
        hidden_states_83 = torch._C._nn.linear(hidden_states_82, l__self___down_blocks_2_attentions_0_proj_out_weight, l__self___down_blocks_2_attentions_0_proj_out_bias);  hidden_states_82 = l__self___down_blocks_2_attentions_0_proj_out_weight = l__self___down_blocks_2_attentions_0_proj_out_bias = None
        reshape_6 = hidden_states_83.reshape(2, 32, 32, 1280);  hidden_states_83 = None
        permute_5 = reshape_6.permute(0, 3, 1, 2);  reshape_6 = None
        hidden_states_84 = permute_5.contiguous();  permute_5 = None
        res_hidden_states_1 = hidden_states_84 + residual_2;  hidden_states_84 = residual_2 = None
        hidden_states_87 = self.L__self___down_blocks_2_resnets_1_norm1(res_hidden_states_1)
        hidden_states_88 = self.L__self___time_embedding_act(hidden_states_87);  hidden_states_87 = None
        l__self___down_blocks_2_resnets_1_conv1_weight = self.L__self___down_blocks_2_resnets_1_conv1_weight
        l__self___down_blocks_2_resnets_1_conv1_bias = self.L__self___down_blocks_2_resnets_1_conv1_bias
        hidden_states_89 = torch.conv2d(hidden_states_88, l__self___down_blocks_2_resnets_1_conv1_weight, l__self___down_blocks_2_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_88 = l__self___down_blocks_2_resnets_1_conv1_weight = l__self___down_blocks_2_resnets_1_conv1_bias = None
        temb_10 = self.L__self___time_embedding_act(emb_11)
        l__self___down_blocks_2_resnets_1_time_emb_proj_weight = self.L__self___down_blocks_2_resnets_1_time_emb_proj_weight
        l__self___down_blocks_2_resnets_1_time_emb_proj_bias = self.L__self___down_blocks_2_resnets_1_time_emb_proj_bias
        out_15 = torch._C._nn.linear(temb_10, l__self___down_blocks_2_resnets_1_time_emb_proj_weight, l__self___down_blocks_2_resnets_1_time_emb_proj_bias);  temb_10 = l__self___down_blocks_2_resnets_1_time_emb_proj_weight = l__self___down_blocks_2_resnets_1_time_emb_proj_bias = None
        temb_11 = out_15[(slice(None, None, None), slice(None, None, None), None, None)];  out_15 = None
        hidden_states_90 = hidden_states_89 + temb_11;  hidden_states_89 = temb_11 = None
        hidden_states_91 = self.L__self___down_blocks_2_resnets_1_norm2(hidden_states_90);  hidden_states_90 = None
        hidden_states_92 = self.L__self___time_embedding_act(hidden_states_91);  hidden_states_91 = None
        hidden_states_93 = self.L__self___down_blocks_2_resnets_1_dropout(hidden_states_92);  hidden_states_92 = None
        l__self___down_blocks_2_resnets_1_conv2_weight = self.L__self___down_blocks_2_resnets_1_conv2_weight
        l__self___down_blocks_2_resnets_1_conv2_bias = self.L__self___down_blocks_2_resnets_1_conv2_bias
        hidden_states_94 = torch.conv2d(hidden_states_93, l__self___down_blocks_2_resnets_1_conv2_weight, l__self___down_blocks_2_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_93 = l__self___down_blocks_2_resnets_1_conv2_weight = l__self___down_blocks_2_resnets_1_conv2_bias = None
        add_15 = res_hidden_states_1 + hidden_states_94;  hidden_states_94 = None
        residual_3 = add_15 / 1.0;  add_15 = None
        hidden_states_96 = self.L__self___down_blocks_2_attentions_1_norm(residual_3)
        permute_6 = hidden_states_96.permute(0, 2, 3, 1);  hidden_states_96 = None
        hidden_states_97 = permute_6.reshape(2, 1024, 1280);  permute_6 = None
        l__self___down_blocks_2_attentions_1_proj_in_weight = self.L__self___down_blocks_2_attentions_1_proj_in_weight
        l__self___down_blocks_2_attentions_1_proj_in_bias = self.L__self___down_blocks_2_attentions_1_proj_in_bias
        hidden_states_98 = torch._C._nn.linear(hidden_states_97, l__self___down_blocks_2_attentions_1_proj_in_weight, l__self___down_blocks_2_attentions_1_proj_in_bias);  hidden_states_97 = l__self___down_blocks_2_attentions_1_proj_in_weight = l__self___down_blocks_2_attentions_1_proj_in_bias = None
        hidden_states_99 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_0(hidden_states_98, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_98 = None
        hidden_states_100 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_1(hidden_states_99, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_99 = None
        hidden_states_101 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_2(hidden_states_100, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_100 = None
        hidden_states_102 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_3(hidden_states_101, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_101 = None
        hidden_states_103 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_4(hidden_states_102, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_102 = None
        hidden_states_104 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_5(hidden_states_103, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_103 = None
        hidden_states_105 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_6(hidden_states_104, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_104 = None
        hidden_states_106 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_7(hidden_states_105, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_105 = None
        hidden_states_107 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_8(hidden_states_106, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_106 = None
        hidden_states_108 = self.L__self___down_blocks_2_attentions_1_transformer_blocks_9(hidden_states_107, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_107 = None
        l__self___down_blocks_2_attentions_1_proj_out_weight = self.L__self___down_blocks_2_attentions_1_proj_out_weight
        l__self___down_blocks_2_attentions_1_proj_out_bias = self.L__self___down_blocks_2_attentions_1_proj_out_bias
        hidden_states_109 = torch._C._nn.linear(hidden_states_108, l__self___down_blocks_2_attentions_1_proj_out_weight, l__self___down_blocks_2_attentions_1_proj_out_bias);  hidden_states_108 = l__self___down_blocks_2_attentions_1_proj_out_weight = l__self___down_blocks_2_attentions_1_proj_out_bias = None
        reshape_8 = hidden_states_109.reshape(2, 32, 32, 1280);  hidden_states_109 = None
        permute_7 = reshape_8.permute(0, 3, 1, 2);  reshape_8 = None
        hidden_states_110 = permute_7.contiguous();  permute_7 = None
        res_hidden_states = hidden_states_110 + residual_3;  hidden_states_110 = residual_3 = None
        hidden_states_113 = self.L__self___mid_block_resnets_0_norm1(res_hidden_states)
        hidden_states_114 = self.L__self___time_embedding_act(hidden_states_113);  hidden_states_113 = None
        l__self___mid_block_resnets_0_conv1_weight = self.L__self___mid_block_resnets_0_conv1_weight
        l__self___mid_block_resnets_0_conv1_bias = self.L__self___mid_block_resnets_0_conv1_bias
        hidden_states_115 = torch.conv2d(hidden_states_114, l__self___mid_block_resnets_0_conv1_weight, l__self___mid_block_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_114 = l__self___mid_block_resnets_0_conv1_weight = l__self___mid_block_resnets_0_conv1_bias = None
        temb_12 = self.L__self___time_embedding_act(emb_11)
        l__self___mid_block_resnets_0_time_emb_proj_weight = self.L__self___mid_block_resnets_0_time_emb_proj_weight
        l__self___mid_block_resnets_0_time_emb_proj_bias = self.L__self___mid_block_resnets_0_time_emb_proj_bias
        out_18 = torch._C._nn.linear(temb_12, l__self___mid_block_resnets_0_time_emb_proj_weight, l__self___mid_block_resnets_0_time_emb_proj_bias);  temb_12 = l__self___mid_block_resnets_0_time_emb_proj_weight = l__self___mid_block_resnets_0_time_emb_proj_bias = None
        temb_13 = out_18[(slice(None, None, None), slice(None, None, None), None, None)];  out_18 = None
        hidden_states_116 = hidden_states_115 + temb_13;  hidden_states_115 = temb_13 = None
        hidden_states_117 = self.L__self___mid_block_resnets_0_norm2(hidden_states_116);  hidden_states_116 = None
        hidden_states_118 = self.L__self___time_embedding_act(hidden_states_117);  hidden_states_117 = None
        hidden_states_119 = self.L__self___mid_block_resnets_0_dropout(hidden_states_118);  hidden_states_118 = None
        l__self___mid_block_resnets_0_conv2_weight = self.L__self___mid_block_resnets_0_conv2_weight
        l__self___mid_block_resnets_0_conv2_bias = self.L__self___mid_block_resnets_0_conv2_bias
        hidden_states_120 = torch.conv2d(hidden_states_119, l__self___mid_block_resnets_0_conv2_weight, l__self___mid_block_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_119 = l__self___mid_block_resnets_0_conv2_weight = l__self___mid_block_resnets_0_conv2_bias = None
        add_18 = res_hidden_states + hidden_states_120;  hidden_states_120 = None
        residual_4 = add_18 / 1;  add_18 = None
        hidden_states_122 = self.L__self___mid_block_attentions_0_norm(residual_4)
        permute_8 = hidden_states_122.permute(0, 2, 3, 1);  hidden_states_122 = None
        hidden_states_123 = permute_8.reshape(2, 1024, 1280);  permute_8 = None
        l__self___mid_block_attentions_0_proj_in_weight = self.L__self___mid_block_attentions_0_proj_in_weight
        l__self___mid_block_attentions_0_proj_in_bias = self.L__self___mid_block_attentions_0_proj_in_bias
        hidden_states_124 = torch._C._nn.linear(hidden_states_123, l__self___mid_block_attentions_0_proj_in_weight, l__self___mid_block_attentions_0_proj_in_bias);  hidden_states_123 = l__self___mid_block_attentions_0_proj_in_weight = l__self___mid_block_attentions_0_proj_in_bias = None
        hidden_states_125 = self.L__self___mid_block_attentions_0_transformer_blocks_0(hidden_states_124, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_124 = None
        hidden_states_126 = self.L__self___mid_block_attentions_0_transformer_blocks_1(hidden_states_125, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_125 = None
        hidden_states_127 = self.L__self___mid_block_attentions_0_transformer_blocks_2(hidden_states_126, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_126 = None
        hidden_states_128 = self.L__self___mid_block_attentions_0_transformer_blocks_3(hidden_states_127, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_127 = None
        hidden_states_129 = self.L__self___mid_block_attentions_0_transformer_blocks_4(hidden_states_128, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_128 = None
        hidden_states_130 = self.L__self___mid_block_attentions_0_transformer_blocks_5(hidden_states_129, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_129 = None
        hidden_states_131 = self.L__self___mid_block_attentions_0_transformer_blocks_6(hidden_states_130, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_130 = None
        hidden_states_132 = self.L__self___mid_block_attentions_0_transformer_blocks_7(hidden_states_131, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_131 = None
        hidden_states_133 = self.L__self___mid_block_attentions_0_transformer_blocks_8(hidden_states_132, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_132 = None
        hidden_states_134 = self.L__self___mid_block_attentions_0_transformer_blocks_9(hidden_states_133, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_133 = None
        l__self___mid_block_attentions_0_proj_out_weight = self.L__self___mid_block_attentions_0_proj_out_weight
        l__self___mid_block_attentions_0_proj_out_bias = self.L__self___mid_block_attentions_0_proj_out_bias
        hidden_states_135 = torch._C._nn.linear(hidden_states_134, l__self___mid_block_attentions_0_proj_out_weight, l__self___mid_block_attentions_0_proj_out_bias);  hidden_states_134 = l__self___mid_block_attentions_0_proj_out_weight = l__self___mid_block_attentions_0_proj_out_bias = None
        reshape_10 = hidden_states_135.reshape(2, 32, 32, 1280);  hidden_states_135 = None
        permute_9 = reshape_10.permute(0, 3, 1, 2);  reshape_10 = None
        hidden_states_136 = permute_9.contiguous();  permute_9 = None
        hidden_states_138 = hidden_states_136 + residual_4;  hidden_states_136 = residual_4 = None
        hidden_states_139 = self.L__self___mid_block_resnets_slice_1__None__None___0_norm1(hidden_states_138)
        hidden_states_140 = self.L__self___time_embedding_act(hidden_states_139);  hidden_states_139 = None
        l__self___mid_block_resnets_slice_1__none__none___0_conv1_weight = self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_weight
        l__self___mid_block_resnets_slice_1__none__none___0_conv1_bias = self.L__self___mid_block_resnets_slice_1__None__None___0_conv1_bias
        hidden_states_141 = torch.conv2d(hidden_states_140, l__self___mid_block_resnets_slice_1__none__none___0_conv1_weight, l__self___mid_block_resnets_slice_1__none__none___0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_140 = l__self___mid_block_resnets_slice_1__none__none___0_conv1_weight = l__self___mid_block_resnets_slice_1__none__none___0_conv1_bias = None
        temb_14 = self.L__self___time_embedding_act(emb_11)
        l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_weight = self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_weight
        l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_bias = self.L__self___mid_block_resnets_slice_1__None__None___0_time_emb_proj_bias
        out_21 = torch._C._nn.linear(temb_14, l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_weight, l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_bias);  temb_14 = l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_weight = l__self___mid_block_resnets_slice_1__none__none___0_time_emb_proj_bias = None
        temb_15 = out_21[(slice(None, None, None), slice(None, None, None), None, None)];  out_21 = None
        hidden_states_142 = hidden_states_141 + temb_15;  hidden_states_141 = temb_15 = None
        hidden_states_143 = self.L__self___mid_block_resnets_slice_1__None__None___0_norm2(hidden_states_142);  hidden_states_142 = None
        hidden_states_144 = self.L__self___time_embedding_act(hidden_states_143);  hidden_states_143 = None
        hidden_states_145 = self.L__self___mid_block_resnets_slice_1__None__None___0_dropout(hidden_states_144);  hidden_states_144 = None
        l__self___mid_block_resnets_slice_1__none__none___0_conv2_weight = self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_weight
        l__self___mid_block_resnets_slice_1__none__none___0_conv2_bias = self.L__self___mid_block_resnets_slice_1__None__None___0_conv2_bias
        hidden_states_146 = torch.conv2d(hidden_states_145, l__self___mid_block_resnets_slice_1__none__none___0_conv2_weight, l__self___mid_block_resnets_slice_1__none__none___0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_145 = l__self___mid_block_resnets_slice_1__none__none___0_conv2_weight = l__self___mid_block_resnets_slice_1__none__none___0_conv2_bias = None
        add_21 = hidden_states_138 + hidden_states_146;  hidden_states_138 = hidden_states_146 = None
        sample_10 = add_21 / 1;  add_21 = None
        hidden_states_149 = torch.cat([sample_10, res_hidden_states], dim = 1);  sample_10 = res_hidden_states = None
        hidden_states_150 = self.L__self___up_blocks_0_resnets_0_norm1(hidden_states_149)
        hidden_states_151 = self.L__self___time_embedding_act(hidden_states_150);  hidden_states_150 = None
        l__self___up_blocks_0_resnets_0_conv1_weight = self.L__self___up_blocks_0_resnets_0_conv1_weight
        l__self___up_blocks_0_resnets_0_conv1_bias = self.L__self___up_blocks_0_resnets_0_conv1_bias
        hidden_states_152 = torch.conv2d(hidden_states_151, l__self___up_blocks_0_resnets_0_conv1_weight, l__self___up_blocks_0_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_151 = l__self___up_blocks_0_resnets_0_conv1_weight = l__self___up_blocks_0_resnets_0_conv1_bias = None
        temb_16 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_0_resnets_0_time_emb_proj_weight = self.L__self___up_blocks_0_resnets_0_time_emb_proj_weight
        l__self___up_blocks_0_resnets_0_time_emb_proj_bias = self.L__self___up_blocks_0_resnets_0_time_emb_proj_bias
        out_22 = torch._C._nn.linear(temb_16, l__self___up_blocks_0_resnets_0_time_emb_proj_weight, l__self___up_blocks_0_resnets_0_time_emb_proj_bias);  temb_16 = l__self___up_blocks_0_resnets_0_time_emb_proj_weight = l__self___up_blocks_0_resnets_0_time_emb_proj_bias = None
        temb_17 = out_22[(slice(None, None, None), slice(None, None, None), None, None)];  out_22 = None
        hidden_states_153 = hidden_states_152 + temb_17;  hidden_states_152 = temb_17 = None
        hidden_states_154 = self.L__self___up_blocks_0_resnets_0_norm2(hidden_states_153);  hidden_states_153 = None
        hidden_states_155 = self.L__self___time_embedding_act(hidden_states_154);  hidden_states_154 = None
        hidden_states_156 = self.L__self___up_blocks_0_resnets_0_dropout(hidden_states_155);  hidden_states_155 = None
        l__self___up_blocks_0_resnets_0_conv2_weight = self.L__self___up_blocks_0_resnets_0_conv2_weight
        l__self___up_blocks_0_resnets_0_conv2_bias = self.L__self___up_blocks_0_resnets_0_conv2_bias
        hidden_states_157 = torch.conv2d(hidden_states_156, l__self___up_blocks_0_resnets_0_conv2_weight, l__self___up_blocks_0_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_156 = l__self___up_blocks_0_resnets_0_conv2_weight = l__self___up_blocks_0_resnets_0_conv2_bias = None
        l__self___up_blocks_0_resnets_0_conv_shortcut_weight = self.L__self___up_blocks_0_resnets_0_conv_shortcut_weight
        l__self___up_blocks_0_resnets_0_conv_shortcut_bias = self.L__self___up_blocks_0_resnets_0_conv_shortcut_bias
        input_tensor_2 = torch.conv2d(hidden_states_149, l__self___up_blocks_0_resnets_0_conv_shortcut_weight, l__self___up_blocks_0_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_149 = l__self___up_blocks_0_resnets_0_conv_shortcut_weight = l__self___up_blocks_0_resnets_0_conv_shortcut_bias = None
        add_23 = input_tensor_2 + hidden_states_157;  input_tensor_2 = hidden_states_157 = None
        residual_5 = add_23 / 1.0;  add_23 = None
        hidden_states_159 = self.L__self___up_blocks_0_attentions_0_norm(residual_5)
        permute_10 = hidden_states_159.permute(0, 2, 3, 1);  hidden_states_159 = None
        hidden_states_160 = permute_10.reshape(2, 1024, 1280);  permute_10 = None
        l__self___up_blocks_0_attentions_0_proj_in_weight = self.L__self___up_blocks_0_attentions_0_proj_in_weight
        l__self___up_blocks_0_attentions_0_proj_in_bias = self.L__self___up_blocks_0_attentions_0_proj_in_bias
        hidden_states_161 = torch._C._nn.linear(hidden_states_160, l__self___up_blocks_0_attentions_0_proj_in_weight, l__self___up_blocks_0_attentions_0_proj_in_bias);  hidden_states_160 = l__self___up_blocks_0_attentions_0_proj_in_weight = l__self___up_blocks_0_attentions_0_proj_in_bias = None
        hidden_states_162 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_0(hidden_states_161, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_161 = None
        hidden_states_163 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_1(hidden_states_162, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_162 = None
        hidden_states_164 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_2(hidden_states_163, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_163 = None
        hidden_states_165 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_3(hidden_states_164, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_164 = None
        hidden_states_166 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_4(hidden_states_165, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_165 = None
        hidden_states_167 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_5(hidden_states_166, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_166 = None
        hidden_states_168 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_6(hidden_states_167, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_167 = None
        hidden_states_169 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_7(hidden_states_168, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_168 = None
        hidden_states_170 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_8(hidden_states_169, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_169 = None
        hidden_states_171 = self.L__self___up_blocks_0_attentions_0_transformer_blocks_9(hidden_states_170, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_170 = None
        l__self___up_blocks_0_attentions_0_proj_out_weight = self.L__self___up_blocks_0_attentions_0_proj_out_weight
        l__self___up_blocks_0_attentions_0_proj_out_bias = self.L__self___up_blocks_0_attentions_0_proj_out_bias
        hidden_states_172 = torch._C._nn.linear(hidden_states_171, l__self___up_blocks_0_attentions_0_proj_out_weight, l__self___up_blocks_0_attentions_0_proj_out_bias);  hidden_states_171 = l__self___up_blocks_0_attentions_0_proj_out_weight = l__self___up_blocks_0_attentions_0_proj_out_bias = None
        reshape_12 = hidden_states_172.reshape(2, 32, 32, 1280);  hidden_states_172 = None
        permute_11 = reshape_12.permute(0, 3, 1, 2);  reshape_12 = None
        hidden_states_173 = permute_11.contiguous();  permute_11 = None
        hidden_states_174 = hidden_states_173 + residual_5;  hidden_states_173 = residual_5 = None
        hidden_states_176 = torch.cat([hidden_states_174, res_hidden_states_1], dim = 1);  hidden_states_174 = res_hidden_states_1 = None
        hidden_states_177 = self.L__self___up_blocks_0_resnets_1_norm1(hidden_states_176)
        hidden_states_178 = self.L__self___time_embedding_act(hidden_states_177);  hidden_states_177 = None
        l__self___up_blocks_0_resnets_1_conv1_weight = self.L__self___up_blocks_0_resnets_1_conv1_weight
        l__self___up_blocks_0_resnets_1_conv1_bias = self.L__self___up_blocks_0_resnets_1_conv1_bias
        hidden_states_179 = torch.conv2d(hidden_states_178, l__self___up_blocks_0_resnets_1_conv1_weight, l__self___up_blocks_0_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_178 = l__self___up_blocks_0_resnets_1_conv1_weight = l__self___up_blocks_0_resnets_1_conv1_bias = None
        temb_18 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_0_resnets_1_time_emb_proj_weight = self.L__self___up_blocks_0_resnets_1_time_emb_proj_weight
        l__self___up_blocks_0_resnets_1_time_emb_proj_bias = self.L__self___up_blocks_0_resnets_1_time_emb_proj_bias
        out_25 = torch._C._nn.linear(temb_18, l__self___up_blocks_0_resnets_1_time_emb_proj_weight, l__self___up_blocks_0_resnets_1_time_emb_proj_bias);  temb_18 = l__self___up_blocks_0_resnets_1_time_emb_proj_weight = l__self___up_blocks_0_resnets_1_time_emb_proj_bias = None
        temb_19 = out_25[(slice(None, None, None), slice(None, None, None), None, None)];  out_25 = None
        hidden_states_180 = hidden_states_179 + temb_19;  hidden_states_179 = temb_19 = None
        hidden_states_181 = self.L__self___up_blocks_0_resnets_1_norm2(hidden_states_180);  hidden_states_180 = None
        hidden_states_182 = self.L__self___time_embedding_act(hidden_states_181);  hidden_states_181 = None
        hidden_states_183 = self.L__self___up_blocks_0_resnets_1_dropout(hidden_states_182);  hidden_states_182 = None
        l__self___up_blocks_0_resnets_1_conv2_weight = self.L__self___up_blocks_0_resnets_1_conv2_weight
        l__self___up_blocks_0_resnets_1_conv2_bias = self.L__self___up_blocks_0_resnets_1_conv2_bias
        hidden_states_184 = torch.conv2d(hidden_states_183, l__self___up_blocks_0_resnets_1_conv2_weight, l__self___up_blocks_0_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_183 = l__self___up_blocks_0_resnets_1_conv2_weight = l__self___up_blocks_0_resnets_1_conv2_bias = None
        l__self___up_blocks_0_resnets_1_conv_shortcut_weight = self.L__self___up_blocks_0_resnets_1_conv_shortcut_weight
        l__self___up_blocks_0_resnets_1_conv_shortcut_bias = self.L__self___up_blocks_0_resnets_1_conv_shortcut_bias
        input_tensor_3 = torch.conv2d(hidden_states_176, l__self___up_blocks_0_resnets_1_conv_shortcut_weight, l__self___up_blocks_0_resnets_1_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_176 = l__self___up_blocks_0_resnets_1_conv_shortcut_weight = l__self___up_blocks_0_resnets_1_conv_shortcut_bias = None
        add_26 = input_tensor_3 + hidden_states_184;  input_tensor_3 = hidden_states_184 = None
        residual_6 = add_26 / 1.0;  add_26 = None
        hidden_states_186 = self.L__self___up_blocks_0_attentions_1_norm(residual_6)
        permute_12 = hidden_states_186.permute(0, 2, 3, 1);  hidden_states_186 = None
        hidden_states_187 = permute_12.reshape(2, 1024, 1280);  permute_12 = None
        l__self___up_blocks_0_attentions_1_proj_in_weight = self.L__self___up_blocks_0_attentions_1_proj_in_weight
        l__self___up_blocks_0_attentions_1_proj_in_bias = self.L__self___up_blocks_0_attentions_1_proj_in_bias
        hidden_states_188 = torch._C._nn.linear(hidden_states_187, l__self___up_blocks_0_attentions_1_proj_in_weight, l__self___up_blocks_0_attentions_1_proj_in_bias);  hidden_states_187 = l__self___up_blocks_0_attentions_1_proj_in_weight = l__self___up_blocks_0_attentions_1_proj_in_bias = None
        hidden_states_189 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_0(hidden_states_188, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_188 = None
        hidden_states_190 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_1(hidden_states_189, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_189 = None
        hidden_states_191 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_2(hidden_states_190, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_190 = None
        hidden_states_192 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_3(hidden_states_191, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_191 = None
        hidden_states_193 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_4(hidden_states_192, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_192 = None
        hidden_states_194 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_5(hidden_states_193, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_193 = None
        hidden_states_195 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_6(hidden_states_194, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_194 = None
        hidden_states_196 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_7(hidden_states_195, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_195 = None
        hidden_states_197 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_8(hidden_states_196, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_196 = None
        hidden_states_198 = self.L__self___up_blocks_0_attentions_1_transformer_blocks_9(hidden_states_197, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_197 = None
        l__self___up_blocks_0_attentions_1_proj_out_weight = self.L__self___up_blocks_0_attentions_1_proj_out_weight
        l__self___up_blocks_0_attentions_1_proj_out_bias = self.L__self___up_blocks_0_attentions_1_proj_out_bias
        hidden_states_199 = torch._C._nn.linear(hidden_states_198, l__self___up_blocks_0_attentions_1_proj_out_weight, l__self___up_blocks_0_attentions_1_proj_out_bias);  hidden_states_198 = l__self___up_blocks_0_attentions_1_proj_out_weight = l__self___up_blocks_0_attentions_1_proj_out_bias = None
        reshape_14 = hidden_states_199.reshape(2, 32, 32, 1280);  hidden_states_199 = None
        permute_13 = reshape_14.permute(0, 3, 1, 2);  reshape_14 = None
        hidden_states_200 = permute_13.contiguous();  permute_13 = None
        hidden_states_201 = hidden_states_200 + residual_6;  hidden_states_200 = residual_6 = None
        hidden_states_203 = torch.cat([hidden_states_201, res_hidden_states_2], dim = 1);  hidden_states_201 = res_hidden_states_2 = None
        hidden_states_204 = self.L__self___up_blocks_0_resnets_2_norm1(hidden_states_203)
        hidden_states_205 = self.L__self___time_embedding_act(hidden_states_204);  hidden_states_204 = None
        l__self___up_blocks_0_resnets_2_conv1_weight = self.L__self___up_blocks_0_resnets_2_conv1_weight
        l__self___up_blocks_0_resnets_2_conv1_bias = self.L__self___up_blocks_0_resnets_2_conv1_bias
        hidden_states_206 = torch.conv2d(hidden_states_205, l__self___up_blocks_0_resnets_2_conv1_weight, l__self___up_blocks_0_resnets_2_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_205 = l__self___up_blocks_0_resnets_2_conv1_weight = l__self___up_blocks_0_resnets_2_conv1_bias = None
        temb_20 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_0_resnets_2_time_emb_proj_weight = self.L__self___up_blocks_0_resnets_2_time_emb_proj_weight
        l__self___up_blocks_0_resnets_2_time_emb_proj_bias = self.L__self___up_blocks_0_resnets_2_time_emb_proj_bias
        out_28 = torch._C._nn.linear(temb_20, l__self___up_blocks_0_resnets_2_time_emb_proj_weight, l__self___up_blocks_0_resnets_2_time_emb_proj_bias);  temb_20 = l__self___up_blocks_0_resnets_2_time_emb_proj_weight = l__self___up_blocks_0_resnets_2_time_emb_proj_bias = None
        temb_21 = out_28[(slice(None, None, None), slice(None, None, None), None, None)];  out_28 = None
        hidden_states_207 = hidden_states_206 + temb_21;  hidden_states_206 = temb_21 = None
        hidden_states_208 = self.L__self___up_blocks_0_resnets_2_norm2(hidden_states_207);  hidden_states_207 = None
        hidden_states_209 = self.L__self___time_embedding_act(hidden_states_208);  hidden_states_208 = None
        hidden_states_210 = self.L__self___up_blocks_0_resnets_2_dropout(hidden_states_209);  hidden_states_209 = None
        l__self___up_blocks_0_resnets_2_conv2_weight = self.L__self___up_blocks_0_resnets_2_conv2_weight
        l__self___up_blocks_0_resnets_2_conv2_bias = self.L__self___up_blocks_0_resnets_2_conv2_bias
        hidden_states_211 = torch.conv2d(hidden_states_210, l__self___up_blocks_0_resnets_2_conv2_weight, l__self___up_blocks_0_resnets_2_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_210 = l__self___up_blocks_0_resnets_2_conv2_weight = l__self___up_blocks_0_resnets_2_conv2_bias = None
        l__self___up_blocks_0_resnets_2_conv_shortcut_weight = self.L__self___up_blocks_0_resnets_2_conv_shortcut_weight
        l__self___up_blocks_0_resnets_2_conv_shortcut_bias = self.L__self___up_blocks_0_resnets_2_conv_shortcut_bias
        input_tensor_4 = torch.conv2d(hidden_states_203, l__self___up_blocks_0_resnets_2_conv_shortcut_weight, l__self___up_blocks_0_resnets_2_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_203 = l__self___up_blocks_0_resnets_2_conv_shortcut_weight = l__self___up_blocks_0_resnets_2_conv_shortcut_bias = None
        add_29 = input_tensor_4 + hidden_states_211;  input_tensor_4 = hidden_states_211 = None
        residual_7 = add_29 / 1.0;  add_29 = None
        hidden_states_213 = self.L__self___up_blocks_0_attentions_2_norm(residual_7)
        permute_14 = hidden_states_213.permute(0, 2, 3, 1);  hidden_states_213 = None
        hidden_states_214 = permute_14.reshape(2, 1024, 1280);  permute_14 = None
        l__self___up_blocks_0_attentions_2_proj_in_weight = self.L__self___up_blocks_0_attentions_2_proj_in_weight
        l__self___up_blocks_0_attentions_2_proj_in_bias = self.L__self___up_blocks_0_attentions_2_proj_in_bias
        hidden_states_215 = torch._C._nn.linear(hidden_states_214, l__self___up_blocks_0_attentions_2_proj_in_weight, l__self___up_blocks_0_attentions_2_proj_in_bias);  hidden_states_214 = l__self___up_blocks_0_attentions_2_proj_in_weight = l__self___up_blocks_0_attentions_2_proj_in_bias = None
        hidden_states_216 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_0(hidden_states_215, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_215 = None
        hidden_states_217 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_1(hidden_states_216, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_216 = None
        hidden_states_218 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_2(hidden_states_217, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_217 = None
        hidden_states_219 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_3(hidden_states_218, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_218 = None
        hidden_states_220 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_4(hidden_states_219, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_219 = None
        hidden_states_221 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_5(hidden_states_220, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_220 = None
        hidden_states_222 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_6(hidden_states_221, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_221 = None
        hidden_states_223 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_7(hidden_states_222, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_222 = None
        hidden_states_224 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_8(hidden_states_223, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_223 = None
        hidden_states_225 = self.L__self___up_blocks_0_attentions_2_transformer_blocks_9(hidden_states_224, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_224 = None
        l__self___up_blocks_0_attentions_2_proj_out_weight = self.L__self___up_blocks_0_attentions_2_proj_out_weight
        l__self___up_blocks_0_attentions_2_proj_out_bias = self.L__self___up_blocks_0_attentions_2_proj_out_bias
        hidden_states_226 = torch._C._nn.linear(hidden_states_225, l__self___up_blocks_0_attentions_2_proj_out_weight, l__self___up_blocks_0_attentions_2_proj_out_bias);  hidden_states_225 = l__self___up_blocks_0_attentions_2_proj_out_weight = l__self___up_blocks_0_attentions_2_proj_out_bias = None
        reshape_16 = hidden_states_226.reshape(2, 32, 32, 1280);  hidden_states_226 = None
        permute_15 = reshape_16.permute(0, 3, 1, 2);  reshape_16 = None
        hidden_states_227 = permute_15.contiguous();  permute_15 = None
        hidden_states_228 = hidden_states_227 + residual_7;  hidden_states_227 = residual_7 = None
        hidden_states_229 = torch.nn.functional.interpolate(hidden_states_228, scale_factor = 2.0, mode = 'nearest');  hidden_states_228 = None
        l__self___up_blocks_0_upsamplers_0_conv_weight = self.L__self___up_blocks_0_upsamplers_0_conv_weight
        l__self___up_blocks_0_upsamplers_0_conv_bias = self.L__self___up_blocks_0_upsamplers_0_conv_bias
        sample_11 = torch.conv2d(hidden_states_229, l__self___up_blocks_0_upsamplers_0_conv_weight, l__self___up_blocks_0_upsamplers_0_conv_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_229 = l__self___up_blocks_0_upsamplers_0_conv_weight = l__self___up_blocks_0_upsamplers_0_conv_bias = None
        hidden_states_233 = torch.cat([sample_11, res_hidden_states_3], dim = 1);  sample_11 = res_hidden_states_3 = None
        hidden_states_234 = self.L__self___up_blocks_1_resnets_0_norm1(hidden_states_233)
        hidden_states_235 = self.L__self___time_embedding_act(hidden_states_234);  hidden_states_234 = None
        l__self___up_blocks_1_resnets_0_conv1_weight = self.L__self___up_blocks_1_resnets_0_conv1_weight
        l__self___up_blocks_1_resnets_0_conv1_bias = self.L__self___up_blocks_1_resnets_0_conv1_bias
        hidden_states_236 = torch.conv2d(hidden_states_235, l__self___up_blocks_1_resnets_0_conv1_weight, l__self___up_blocks_1_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_235 = l__self___up_blocks_1_resnets_0_conv1_weight = l__self___up_blocks_1_resnets_0_conv1_bias = None
        temb_22 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_1_resnets_0_time_emb_proj_weight = self.L__self___up_blocks_1_resnets_0_time_emb_proj_weight
        l__self___up_blocks_1_resnets_0_time_emb_proj_bias = self.L__self___up_blocks_1_resnets_0_time_emb_proj_bias
        out_31 = torch._C._nn.linear(temb_22, l__self___up_blocks_1_resnets_0_time_emb_proj_weight, l__self___up_blocks_1_resnets_0_time_emb_proj_bias);  temb_22 = l__self___up_blocks_1_resnets_0_time_emb_proj_weight = l__self___up_blocks_1_resnets_0_time_emb_proj_bias = None
        temb_23 = out_31[(slice(None, None, None), slice(None, None, None), None, None)];  out_31 = None
        hidden_states_237 = hidden_states_236 + temb_23;  hidden_states_236 = temb_23 = None
        hidden_states_238 = self.L__self___up_blocks_1_resnets_0_norm2(hidden_states_237);  hidden_states_237 = None
        hidden_states_239 = self.L__self___time_embedding_act(hidden_states_238);  hidden_states_238 = None
        hidden_states_240 = self.L__self___up_blocks_1_resnets_0_dropout(hidden_states_239);  hidden_states_239 = None
        l__self___up_blocks_1_resnets_0_conv2_weight = self.L__self___up_blocks_1_resnets_0_conv2_weight
        l__self___up_blocks_1_resnets_0_conv2_bias = self.L__self___up_blocks_1_resnets_0_conv2_bias
        hidden_states_241 = torch.conv2d(hidden_states_240, l__self___up_blocks_1_resnets_0_conv2_weight, l__self___up_blocks_1_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_240 = l__self___up_blocks_1_resnets_0_conv2_weight = l__self___up_blocks_1_resnets_0_conv2_bias = None
        l__self___up_blocks_1_resnets_0_conv_shortcut_weight = self.L__self___up_blocks_1_resnets_0_conv_shortcut_weight
        l__self___up_blocks_1_resnets_0_conv_shortcut_bias = self.L__self___up_blocks_1_resnets_0_conv_shortcut_bias
        input_tensor_5 = torch.conv2d(hidden_states_233, l__self___up_blocks_1_resnets_0_conv_shortcut_weight, l__self___up_blocks_1_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_233 = l__self___up_blocks_1_resnets_0_conv_shortcut_weight = l__self___up_blocks_1_resnets_0_conv_shortcut_bias = None
        add_32 = input_tensor_5 + hidden_states_241;  input_tensor_5 = hidden_states_241 = None
        residual_8 = add_32 / 1.0;  add_32 = None
        hidden_states_243 = self.L__self___up_blocks_1_attentions_0_norm(residual_8)
        permute_16 = hidden_states_243.permute(0, 2, 3, 1);  hidden_states_243 = None
        hidden_states_244 = permute_16.reshape(2, 4096, 640);  permute_16 = None
        l__self___up_blocks_1_attentions_0_proj_in_weight = self.L__self___up_blocks_1_attentions_0_proj_in_weight
        l__self___up_blocks_1_attentions_0_proj_in_bias = self.L__self___up_blocks_1_attentions_0_proj_in_bias
        hidden_states_245 = torch._C._nn.linear(hidden_states_244, l__self___up_blocks_1_attentions_0_proj_in_weight, l__self___up_blocks_1_attentions_0_proj_in_bias);  hidden_states_244 = l__self___up_blocks_1_attentions_0_proj_in_weight = l__self___up_blocks_1_attentions_0_proj_in_bias = None
        hidden_states_246 = self.L__self___up_blocks_1_attentions_0_transformer_blocks_0(hidden_states_245, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_245 = None
        hidden_states_247 = self.L__self___up_blocks_1_attentions_0_transformer_blocks_1(hidden_states_246, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_246 = None
        l__self___up_blocks_1_attentions_0_proj_out_weight = self.L__self___up_blocks_1_attentions_0_proj_out_weight
        l__self___up_blocks_1_attentions_0_proj_out_bias = self.L__self___up_blocks_1_attentions_0_proj_out_bias
        hidden_states_248 = torch._C._nn.linear(hidden_states_247, l__self___up_blocks_1_attentions_0_proj_out_weight, l__self___up_blocks_1_attentions_0_proj_out_bias);  hidden_states_247 = l__self___up_blocks_1_attentions_0_proj_out_weight = l__self___up_blocks_1_attentions_0_proj_out_bias = None
        reshape_18 = hidden_states_248.reshape(2, 64, 64, 640);  hidden_states_248 = None
        permute_17 = reshape_18.permute(0, 3, 1, 2);  reshape_18 = None
        hidden_states_249 = permute_17.contiguous();  permute_17 = None
        hidden_states_250 = hidden_states_249 + residual_8;  hidden_states_249 = residual_8 = None
        hidden_states_252 = torch.cat([hidden_states_250, res_hidden_states_4], dim = 1);  hidden_states_250 = res_hidden_states_4 = None
        hidden_states_253 = self.L__self___up_blocks_1_resnets_1_norm1(hidden_states_252)
        hidden_states_254 = self.L__self___time_embedding_act(hidden_states_253);  hidden_states_253 = None
        l__self___up_blocks_1_resnets_1_conv1_weight = self.L__self___up_blocks_1_resnets_1_conv1_weight
        l__self___up_blocks_1_resnets_1_conv1_bias = self.L__self___up_blocks_1_resnets_1_conv1_bias
        hidden_states_255 = torch.conv2d(hidden_states_254, l__self___up_blocks_1_resnets_1_conv1_weight, l__self___up_blocks_1_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_254 = l__self___up_blocks_1_resnets_1_conv1_weight = l__self___up_blocks_1_resnets_1_conv1_bias = None
        temb_24 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_1_resnets_1_time_emb_proj_weight = self.L__self___up_blocks_1_resnets_1_time_emb_proj_weight
        l__self___up_blocks_1_resnets_1_time_emb_proj_bias = self.L__self___up_blocks_1_resnets_1_time_emb_proj_bias
        out_34 = torch._C._nn.linear(temb_24, l__self___up_blocks_1_resnets_1_time_emb_proj_weight, l__self___up_blocks_1_resnets_1_time_emb_proj_bias);  temb_24 = l__self___up_blocks_1_resnets_1_time_emb_proj_weight = l__self___up_blocks_1_resnets_1_time_emb_proj_bias = None
        temb_25 = out_34[(slice(None, None, None), slice(None, None, None), None, None)];  out_34 = None
        hidden_states_256 = hidden_states_255 + temb_25;  hidden_states_255 = temb_25 = None
        hidden_states_257 = self.L__self___up_blocks_1_resnets_1_norm2(hidden_states_256);  hidden_states_256 = None
        hidden_states_258 = self.L__self___time_embedding_act(hidden_states_257);  hidden_states_257 = None
        hidden_states_259 = self.L__self___up_blocks_1_resnets_1_dropout(hidden_states_258);  hidden_states_258 = None
        l__self___up_blocks_1_resnets_1_conv2_weight = self.L__self___up_blocks_1_resnets_1_conv2_weight
        l__self___up_blocks_1_resnets_1_conv2_bias = self.L__self___up_blocks_1_resnets_1_conv2_bias
        hidden_states_260 = torch.conv2d(hidden_states_259, l__self___up_blocks_1_resnets_1_conv2_weight, l__self___up_blocks_1_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_259 = l__self___up_blocks_1_resnets_1_conv2_weight = l__self___up_blocks_1_resnets_1_conv2_bias = None
        l__self___up_blocks_1_resnets_1_conv_shortcut_weight = self.L__self___up_blocks_1_resnets_1_conv_shortcut_weight
        l__self___up_blocks_1_resnets_1_conv_shortcut_bias = self.L__self___up_blocks_1_resnets_1_conv_shortcut_bias
        input_tensor_6 = torch.conv2d(hidden_states_252, l__self___up_blocks_1_resnets_1_conv_shortcut_weight, l__self___up_blocks_1_resnets_1_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_252 = l__self___up_blocks_1_resnets_1_conv_shortcut_weight = l__self___up_blocks_1_resnets_1_conv_shortcut_bias = None
        add_35 = input_tensor_6 + hidden_states_260;  input_tensor_6 = hidden_states_260 = None
        residual_9 = add_35 / 1.0;  add_35 = None
        hidden_states_262 = self.L__self___up_blocks_1_attentions_1_norm(residual_9)
        permute_18 = hidden_states_262.permute(0, 2, 3, 1);  hidden_states_262 = None
        hidden_states_263 = permute_18.reshape(2, 4096, 640);  permute_18 = None
        l__self___up_blocks_1_attentions_1_proj_in_weight = self.L__self___up_blocks_1_attentions_1_proj_in_weight
        l__self___up_blocks_1_attentions_1_proj_in_bias = self.L__self___up_blocks_1_attentions_1_proj_in_bias
        hidden_states_264 = torch._C._nn.linear(hidden_states_263, l__self___up_blocks_1_attentions_1_proj_in_weight, l__self___up_blocks_1_attentions_1_proj_in_bias);  hidden_states_263 = l__self___up_blocks_1_attentions_1_proj_in_weight = l__self___up_blocks_1_attentions_1_proj_in_bias = None
        hidden_states_265 = self.L__self___up_blocks_1_attentions_1_transformer_blocks_0(hidden_states_264, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_264 = None
        hidden_states_266 = self.L__self___up_blocks_1_attentions_1_transformer_blocks_1(hidden_states_265, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_265 = None
        l__self___up_blocks_1_attentions_1_proj_out_weight = self.L__self___up_blocks_1_attentions_1_proj_out_weight
        l__self___up_blocks_1_attentions_1_proj_out_bias = self.L__self___up_blocks_1_attentions_1_proj_out_bias
        hidden_states_267 = torch._C._nn.linear(hidden_states_266, l__self___up_blocks_1_attentions_1_proj_out_weight, l__self___up_blocks_1_attentions_1_proj_out_bias);  hidden_states_266 = l__self___up_blocks_1_attentions_1_proj_out_weight = l__self___up_blocks_1_attentions_1_proj_out_bias = None
        reshape_20 = hidden_states_267.reshape(2, 64, 64, 640);  hidden_states_267 = None
        permute_19 = reshape_20.permute(0, 3, 1, 2);  reshape_20 = None
        hidden_states_268 = permute_19.contiguous();  permute_19 = None
        hidden_states_269 = hidden_states_268 + residual_9;  hidden_states_268 = residual_9 = None
        hidden_states_271 = torch.cat([hidden_states_269, res_hidden_states_5], dim = 1);  hidden_states_269 = res_hidden_states_5 = None
        hidden_states_272 = self.L__self___up_blocks_1_resnets_2_norm1(hidden_states_271)
        hidden_states_273 = self.L__self___time_embedding_act(hidden_states_272);  hidden_states_272 = None
        l__self___up_blocks_1_resnets_2_conv1_weight = self.L__self___up_blocks_1_resnets_2_conv1_weight
        l__self___up_blocks_1_resnets_2_conv1_bias = self.L__self___up_blocks_1_resnets_2_conv1_bias
        hidden_states_274 = torch.conv2d(hidden_states_273, l__self___up_blocks_1_resnets_2_conv1_weight, l__self___up_blocks_1_resnets_2_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_273 = l__self___up_blocks_1_resnets_2_conv1_weight = l__self___up_blocks_1_resnets_2_conv1_bias = None
        temb_26 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_1_resnets_2_time_emb_proj_weight = self.L__self___up_blocks_1_resnets_2_time_emb_proj_weight
        l__self___up_blocks_1_resnets_2_time_emb_proj_bias = self.L__self___up_blocks_1_resnets_2_time_emb_proj_bias
        out_37 = torch._C._nn.linear(temb_26, l__self___up_blocks_1_resnets_2_time_emb_proj_weight, l__self___up_blocks_1_resnets_2_time_emb_proj_bias);  temb_26 = l__self___up_blocks_1_resnets_2_time_emb_proj_weight = l__self___up_blocks_1_resnets_2_time_emb_proj_bias = None
        temb_27 = out_37[(slice(None, None, None), slice(None, None, None), None, None)];  out_37 = None
        hidden_states_275 = hidden_states_274 + temb_27;  hidden_states_274 = temb_27 = None
        hidden_states_276 = self.L__self___up_blocks_1_resnets_2_norm2(hidden_states_275);  hidden_states_275 = None
        hidden_states_277 = self.L__self___time_embedding_act(hidden_states_276);  hidden_states_276 = None
        hidden_states_278 = self.L__self___up_blocks_1_resnets_2_dropout(hidden_states_277);  hidden_states_277 = None
        l__self___up_blocks_1_resnets_2_conv2_weight = self.L__self___up_blocks_1_resnets_2_conv2_weight
        l__self___up_blocks_1_resnets_2_conv2_bias = self.L__self___up_blocks_1_resnets_2_conv2_bias
        hidden_states_279 = torch.conv2d(hidden_states_278, l__self___up_blocks_1_resnets_2_conv2_weight, l__self___up_blocks_1_resnets_2_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_278 = l__self___up_blocks_1_resnets_2_conv2_weight = l__self___up_blocks_1_resnets_2_conv2_bias = None
        l__self___up_blocks_1_resnets_2_conv_shortcut_weight = self.L__self___up_blocks_1_resnets_2_conv_shortcut_weight
        l__self___up_blocks_1_resnets_2_conv_shortcut_bias = self.L__self___up_blocks_1_resnets_2_conv_shortcut_bias
        input_tensor_7 = torch.conv2d(hidden_states_271, l__self___up_blocks_1_resnets_2_conv_shortcut_weight, l__self___up_blocks_1_resnets_2_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_271 = l__self___up_blocks_1_resnets_2_conv_shortcut_weight = l__self___up_blocks_1_resnets_2_conv_shortcut_bias = None
        add_38 = input_tensor_7 + hidden_states_279;  input_tensor_7 = hidden_states_279 = None
        residual_10 = add_38 / 1.0;  add_38 = None
        hidden_states_281 = self.L__self___up_blocks_1_attentions_2_norm(residual_10)
        permute_20 = hidden_states_281.permute(0, 2, 3, 1);  hidden_states_281 = None
        hidden_states_282 = permute_20.reshape(2, 4096, 640);  permute_20 = None
        l__self___up_blocks_1_attentions_2_proj_in_weight = self.L__self___up_blocks_1_attentions_2_proj_in_weight
        l__self___up_blocks_1_attentions_2_proj_in_bias = self.L__self___up_blocks_1_attentions_2_proj_in_bias
        hidden_states_283 = torch._C._nn.linear(hidden_states_282, l__self___up_blocks_1_attentions_2_proj_in_weight, l__self___up_blocks_1_attentions_2_proj_in_bias);  hidden_states_282 = l__self___up_blocks_1_attentions_2_proj_in_weight = l__self___up_blocks_1_attentions_2_proj_in_bias = None
        hidden_states_284 = self.L__self___up_blocks_1_attentions_2_transformer_blocks_0(hidden_states_283, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_283 = None
        hidden_states_285 = self.L__self___up_blocks_1_attentions_2_transformer_blocks_1(hidden_states_284, attention_mask = None, encoder_hidden_states = l_encoder_hidden_states_, encoder_attention_mask = None, timestep = None, cross_attention_kwargs = None, class_labels = None);  hidden_states_284 = l_encoder_hidden_states_ = None
        l__self___up_blocks_1_attentions_2_proj_out_weight = self.L__self___up_blocks_1_attentions_2_proj_out_weight
        l__self___up_blocks_1_attentions_2_proj_out_bias = self.L__self___up_blocks_1_attentions_2_proj_out_bias
        hidden_states_286 = torch._C._nn.linear(hidden_states_285, l__self___up_blocks_1_attentions_2_proj_out_weight, l__self___up_blocks_1_attentions_2_proj_out_bias);  hidden_states_285 = l__self___up_blocks_1_attentions_2_proj_out_weight = l__self___up_blocks_1_attentions_2_proj_out_bias = None
        reshape_22 = hidden_states_286.reshape(2, 64, 64, 640);  hidden_states_286 = None
        permute_21 = reshape_22.permute(0, 3, 1, 2);  reshape_22 = None
        hidden_states_287 = permute_21.contiguous();  permute_21 = None
        hidden_states_288 = hidden_states_287 + residual_10;  hidden_states_287 = residual_10 = None
        hidden_states_289 = torch.nn.functional.interpolate(hidden_states_288, scale_factor = 2.0, mode = 'nearest');  hidden_states_288 = None
        l__self___up_blocks_1_upsamplers_0_conv_weight = self.L__self___up_blocks_1_upsamplers_0_conv_weight
        l__self___up_blocks_1_upsamplers_0_conv_bias = self.L__self___up_blocks_1_upsamplers_0_conv_bias
        sample_12 = torch.conv2d(hidden_states_289, l__self___up_blocks_1_upsamplers_0_conv_weight, l__self___up_blocks_1_upsamplers_0_conv_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_289 = l__self___up_blocks_1_upsamplers_0_conv_weight = l__self___up_blocks_1_upsamplers_0_conv_bias = None
        hidden_states_293 = torch.cat([sample_12, res_hidden_states_6], dim = 1);  sample_12 = res_hidden_states_6 = None
        hidden_states_294 = self.L__self___up_blocks_2_resnets_0_norm1(hidden_states_293)
        hidden_states_295 = self.L__self___time_embedding_act(hidden_states_294);  hidden_states_294 = None
        l__self___up_blocks_2_resnets_0_conv1_weight = self.L__self___up_blocks_2_resnets_0_conv1_weight
        l__self___up_blocks_2_resnets_0_conv1_bias = self.L__self___up_blocks_2_resnets_0_conv1_bias
        hidden_states_296 = torch.conv2d(hidden_states_295, l__self___up_blocks_2_resnets_0_conv1_weight, l__self___up_blocks_2_resnets_0_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_295 = l__self___up_blocks_2_resnets_0_conv1_weight = l__self___up_blocks_2_resnets_0_conv1_bias = None
        temb_28 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_2_resnets_0_time_emb_proj_weight = self.L__self___up_blocks_2_resnets_0_time_emb_proj_weight
        l__self___up_blocks_2_resnets_0_time_emb_proj_bias = self.L__self___up_blocks_2_resnets_0_time_emb_proj_bias
        out_40 = torch._C._nn.linear(temb_28, l__self___up_blocks_2_resnets_0_time_emb_proj_weight, l__self___up_blocks_2_resnets_0_time_emb_proj_bias);  temb_28 = l__self___up_blocks_2_resnets_0_time_emb_proj_weight = l__self___up_blocks_2_resnets_0_time_emb_proj_bias = None
        temb_29 = out_40[(slice(None, None, None), slice(None, None, None), None, None)];  out_40 = None
        hidden_states_297 = hidden_states_296 + temb_29;  hidden_states_296 = temb_29 = None
        hidden_states_298 = self.L__self___up_blocks_2_resnets_0_norm2(hidden_states_297);  hidden_states_297 = None
        hidden_states_299 = self.L__self___time_embedding_act(hidden_states_298);  hidden_states_298 = None
        hidden_states_300 = self.L__self___up_blocks_2_resnets_0_dropout(hidden_states_299);  hidden_states_299 = None
        l__self___up_blocks_2_resnets_0_conv2_weight = self.L__self___up_blocks_2_resnets_0_conv2_weight
        l__self___up_blocks_2_resnets_0_conv2_bias = self.L__self___up_blocks_2_resnets_0_conv2_bias
        hidden_states_301 = torch.conv2d(hidden_states_300, l__self___up_blocks_2_resnets_0_conv2_weight, l__self___up_blocks_2_resnets_0_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_300 = l__self___up_blocks_2_resnets_0_conv2_weight = l__self___up_blocks_2_resnets_0_conv2_bias = None
        l__self___up_blocks_2_resnets_0_conv_shortcut_weight = self.L__self___up_blocks_2_resnets_0_conv_shortcut_weight
        l__self___up_blocks_2_resnets_0_conv_shortcut_bias = self.L__self___up_blocks_2_resnets_0_conv_shortcut_bias
        input_tensor_8 = torch.conv2d(hidden_states_293, l__self___up_blocks_2_resnets_0_conv_shortcut_weight, l__self___up_blocks_2_resnets_0_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_293 = l__self___up_blocks_2_resnets_0_conv_shortcut_weight = l__self___up_blocks_2_resnets_0_conv_shortcut_bias = None
        add_41 = input_tensor_8 + hidden_states_301;  input_tensor_8 = hidden_states_301 = None
        hidden_states_302 = add_41 / 1.0;  add_41 = None
        hidden_states_304 = torch.cat([hidden_states_302, res_hidden_states_7], dim = 1);  hidden_states_302 = res_hidden_states_7 = None
        hidden_states_305 = self.L__self___up_blocks_2_resnets_1_norm1(hidden_states_304)
        hidden_states_306 = self.L__self___time_embedding_act(hidden_states_305);  hidden_states_305 = None
        l__self___up_blocks_2_resnets_1_conv1_weight = self.L__self___up_blocks_2_resnets_1_conv1_weight
        l__self___up_blocks_2_resnets_1_conv1_bias = self.L__self___up_blocks_2_resnets_1_conv1_bias
        hidden_states_307 = torch.conv2d(hidden_states_306, l__self___up_blocks_2_resnets_1_conv1_weight, l__self___up_blocks_2_resnets_1_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_306 = l__self___up_blocks_2_resnets_1_conv1_weight = l__self___up_blocks_2_resnets_1_conv1_bias = None
        temb_30 = self.L__self___time_embedding_act(emb_11)
        l__self___up_blocks_2_resnets_1_time_emb_proj_weight = self.L__self___up_blocks_2_resnets_1_time_emb_proj_weight
        l__self___up_blocks_2_resnets_1_time_emb_proj_bias = self.L__self___up_blocks_2_resnets_1_time_emb_proj_bias
        out_41 = torch._C._nn.linear(temb_30, l__self___up_blocks_2_resnets_1_time_emb_proj_weight, l__self___up_blocks_2_resnets_1_time_emb_proj_bias);  temb_30 = l__self___up_blocks_2_resnets_1_time_emb_proj_weight = l__self___up_blocks_2_resnets_1_time_emb_proj_bias = None
        temb_31 = out_41[(slice(None, None, None), slice(None, None, None), None, None)];  out_41 = None
        hidden_states_308 = hidden_states_307 + temb_31;  hidden_states_307 = temb_31 = None
        hidden_states_309 = self.L__self___up_blocks_2_resnets_1_norm2(hidden_states_308);  hidden_states_308 = None
        hidden_states_310 = self.L__self___time_embedding_act(hidden_states_309);  hidden_states_309 = None
        hidden_states_311 = self.L__self___up_blocks_2_resnets_1_dropout(hidden_states_310);  hidden_states_310 = None
        l__self___up_blocks_2_resnets_1_conv2_weight = self.L__self___up_blocks_2_resnets_1_conv2_weight
        l__self___up_blocks_2_resnets_1_conv2_bias = self.L__self___up_blocks_2_resnets_1_conv2_bias
        hidden_states_312 = torch.conv2d(hidden_states_311, l__self___up_blocks_2_resnets_1_conv2_weight, l__self___up_blocks_2_resnets_1_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_311 = l__self___up_blocks_2_resnets_1_conv2_weight = l__self___up_blocks_2_resnets_1_conv2_bias = None
        l__self___up_blocks_2_resnets_1_conv_shortcut_weight = self.L__self___up_blocks_2_resnets_1_conv_shortcut_weight
        l__self___up_blocks_2_resnets_1_conv_shortcut_bias = self.L__self___up_blocks_2_resnets_1_conv_shortcut_bias
        input_tensor_9 = torch.conv2d(hidden_states_304, l__self___up_blocks_2_resnets_1_conv_shortcut_weight, l__self___up_blocks_2_resnets_1_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_304 = l__self___up_blocks_2_resnets_1_conv_shortcut_weight = l__self___up_blocks_2_resnets_1_conv_shortcut_bias = None
        add_43 = input_tensor_9 + hidden_states_312;  input_tensor_9 = hidden_states_312 = None
        hidden_states_313 = add_43 / 1.0;  add_43 = None
        hidden_states_315 = torch.cat([hidden_states_313, res_hidden_states_8], dim = 1);  hidden_states_313 = res_hidden_states_8 = None
        hidden_states_316 = self.L__self___up_blocks_2_resnets_2_norm1(hidden_states_315)
        hidden_states_317 = self.L__self___time_embedding_act(hidden_states_316);  hidden_states_316 = None
        l__self___up_blocks_2_resnets_2_conv1_weight = self.L__self___up_blocks_2_resnets_2_conv1_weight
        l__self___up_blocks_2_resnets_2_conv1_bias = self.L__self___up_blocks_2_resnets_2_conv1_bias
        hidden_states_318 = torch.conv2d(hidden_states_317, l__self___up_blocks_2_resnets_2_conv1_weight, l__self___up_blocks_2_resnets_2_conv1_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_317 = l__self___up_blocks_2_resnets_2_conv1_weight = l__self___up_blocks_2_resnets_2_conv1_bias = None
        temb_32 = self.L__self___time_embedding_act(emb_11);  emb_11 = None
        l__self___up_blocks_2_resnets_2_time_emb_proj_weight = self.L__self___up_blocks_2_resnets_2_time_emb_proj_weight
        l__self___up_blocks_2_resnets_2_time_emb_proj_bias = self.L__self___up_blocks_2_resnets_2_time_emb_proj_bias
        out_42 = torch._C._nn.linear(temb_32, l__self___up_blocks_2_resnets_2_time_emb_proj_weight, l__self___up_blocks_2_resnets_2_time_emb_proj_bias);  temb_32 = l__self___up_blocks_2_resnets_2_time_emb_proj_weight = l__self___up_blocks_2_resnets_2_time_emb_proj_bias = None
        temb_33 = out_42[(slice(None, None, None), slice(None, None, None), None, None)];  out_42 = None
        hidden_states_319 = hidden_states_318 + temb_33;  hidden_states_318 = temb_33 = None
        hidden_states_320 = self.L__self___up_blocks_2_resnets_2_norm2(hidden_states_319);  hidden_states_319 = None
        hidden_states_321 = self.L__self___time_embedding_act(hidden_states_320);  hidden_states_320 = None
        hidden_states_322 = self.L__self___up_blocks_2_resnets_2_dropout(hidden_states_321);  hidden_states_321 = None
        l__self___up_blocks_2_resnets_2_conv2_weight = self.L__self___up_blocks_2_resnets_2_conv2_weight
        l__self___up_blocks_2_resnets_2_conv2_bias = self.L__self___up_blocks_2_resnets_2_conv2_bias
        hidden_states_323 = torch.conv2d(hidden_states_322, l__self___up_blocks_2_resnets_2_conv2_weight, l__self___up_blocks_2_resnets_2_conv2_bias, (1, 1), (1, 1), (1, 1), 1);  hidden_states_322 = l__self___up_blocks_2_resnets_2_conv2_weight = l__self___up_blocks_2_resnets_2_conv2_bias = None
        l__self___up_blocks_2_resnets_2_conv_shortcut_weight = self.L__self___up_blocks_2_resnets_2_conv_shortcut_weight
        l__self___up_blocks_2_resnets_2_conv_shortcut_bias = self.L__self___up_blocks_2_resnets_2_conv_shortcut_bias
        input_tensor_10 = torch.conv2d(hidden_states_315, l__self___up_blocks_2_resnets_2_conv_shortcut_weight, l__self___up_blocks_2_resnets_2_conv_shortcut_bias, (1, 1), (0, 0), (1, 1), 1);  hidden_states_315 = l__self___up_blocks_2_resnets_2_conv_shortcut_weight = l__self___up_blocks_2_resnets_2_conv_shortcut_bias = None
        add_45 = input_tensor_10 + hidden_states_323;  input_tensor_10 = hidden_states_323 = None
        sample_13 = add_45 / 1.0;  add_45 = None
        sample_14 = self.L__self___conv_norm_out(sample_13);  sample_13 = None
        sample_15 = self.L__self___time_embedding_act(sample_14);  sample_14 = None
        sample_16 = self.L__self___conv_out(sample_15);  sample_15 = None
        return (sample_16,)


 mod = Repro()

 def load_args(reader):
    buf0 = reader.storage('74c1111a8ba16b7a9e0692d1b22a3bb3f7a2de1e', 262144, device=device(type='cuda', index=0), dtype_hint=torch.float16)
    reader.tensor(buf0, (2, 4, 128, 128), dtype=torch.float16, is_leaf=True)  # L_sample_
    buf1 = reader.storage('1ef05ec122a136dbfeb7bca9c18d685e58284271', 160, device=device(type='cuda', index=0))
    reader.tensor(buf1, (), is_leaf=True)  # L_timestep_
    buf2 = reader.storage('3c5868917b64af67510e0ccde816043463effeed', 5120, device=device(type='cuda', index=0), dtype_hint=torch.float16)
    reader.tensor(buf2, (2, 1280), dtype=torch.float16, is_leaf=True)  # L_added_cond_kwargs_text_embeds_
    buf3 = reader.storage('53b60f9d6318b06eab596e8e3412589052cadb30', 24, device=device(type='cuda', index=0), dtype_hint=torch.float16)
    reader.tensor(buf3, (2, 6), dtype=torch.float16, is_leaf=True)  # L_added_cond_kwargs_time_ids_
    buf4 = reader.storage('8887258b000c59dd41b243f7282424a00d7d421b', 630784, device=device(type='cuda', index=0), dtype_hint=torch.float16)
    reader.tensor(buf4, (2, 77, 2048), dtype=torch.float16, is_leaf=True)  # L_encoder_hidden_states_
 load_args._version = 0

 if __name__ == '__main__':
    from torch._dynamo.repro.after_dynamo import run_repro
    run_repro(mod, load_args, accuracy=False, command='minify',
        save_dir='/mnt/zeph/home/emil/Projects/rocm_test/torch_compile_debug/run_2023_11_14_07_11_57_623143-pid_9406/minifier/checkpoints', autocast=False, backend=None)