pashu123 · October 20, 2022 15:35
diff --git a/xyz_elided.mlir b/xyz_elided.mlir
 module attributes {torch.debug_module_name = "_lambda"} {
  func.func @forward(%arg0: !torch.vtensor<[1,4,64,64],f16>, %arg1: !torch.vtensor<[1],f16>, %arg2: !torch.vtensor<[2,77,768],f16>, %arg3: !torch.vtensor<[],f32>) -> !torch.vtensor<[1,4,64,64],f16> {
    %int64 = torch.constant.int 64
    %int320 = torch.constant.int 320
    %int2 = torch.constant.int 2
    %int40960 = torch.constant.int 40960
    %int4096 = torch.constant.int 4096
    %int10 = torch.constant.int 10
    %int32 = torch.constant.int 32
    %int640 = torch.constant.int 640
    %int81920 = torch.constant.int 81920
    %int20 = torch.constant.int 20
    %int960 = torch.constant.int 960
    %int122880 = torch.constant.int 122880
    %int30 = torch.constant.int 30
    %int1024 = torch.constant.int 1024
    %int20480 = torch.constant.int 20480
    %int30720 = torch.constant.int 30720
    %int1280 = torch.constant.int 1280
    %int40 = torch.constant.int 40
    %int1920 = torch.constant.int 1920
    %int61440 = torch.constant.int 61440
    %int60 = torch.constant.int 60
    %int256 = torch.constant.int 256
    %int16 = torch.constant.int 16
    %int10240 = torch.constant.int 10240
    %int15360 = torch.constant.int 15360
    %int2560 = torch.constant.int 2560
    %int80 = torch.constant.int 80
    %int8 = torch.constant.int 8
    %int5120 = torch.constant.int 5120
    %int1 = torch.constant.int 1
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %int7 = torch.constant.int 7
    %float0.000000e00 = torch.constant.float 0.000000e+00
    %int160 = torch.constant.int 160
    %0 = torch.vtensor.literal(dense<7.500000e+00> : tensor<f64>) : !torch.vtensor<[],f64>
    %1 = torch.vtensor.literal(dense<0.079056941504209485> : tensor<f64>) : !torch.vtensor<[],f64>
    %2 = torch.vtensor.literal(dense<0.11180339887498948> : tensor<f64>) : !torch.vtensor<[],f64>
    %3 = torch.vtensor.literal(dense<0.15811388300841897> : tensor<f64>) : !torch.vtensor<[],f64>
    %4 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64>
    %5 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64>
    %6 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64>
    %7 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64>
    %8 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64>
    %9 = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64>
    %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16>
    %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16>
    %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
    %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
    %83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf16>) : !torch.vtensor<[640,320,3,3],f16>
    %93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf16>) : !torch.vtensor<[640,320,1,1],f16>
    %101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
    %123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
    %125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
    %159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
    %161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf16>) : !torch.vtensor<[1280,640,3,3],f16>
    %169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf16>) : !torch.vtensor<[1280,640,1,1],f16>
    %177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
    %199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
    %201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
    %235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
    %237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
    %293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
    %295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
    %311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
    %319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
    %323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
    %331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
    %335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
    %343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
    %349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
    %357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
    %379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
    %381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
    %387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
    %395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
    %417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
    %419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf16>) : !torch.vtensor<[1280,1920,3,3],f16>
    %425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf16>) : !torch.vtensor<[1280,1920,1,1],f16>
    %433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
    %450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
    %455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
    %457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
    %459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
    %461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf16>) : !torch.vtensor<[640,1920,3,3],f16>
    %465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf16>) : !torch.vtensor<[640,1920,1,1],f16>
    %473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
    %495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
    %497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf16>) : !torch.vtensor<[640,1280,3,3],f16>
    %503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf16>) : !torch.vtensor<[640,1280,1,1],f16>
    %511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
    %533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
    %535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf16>) : !torch.vtensor<[640,960,3,3],f16>
    %541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf16>) : !torch.vtensor<[640,960,1,1],f16>
    %549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
    %566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
    %567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
    %571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
    %573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
    %575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
    %577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf16>) : !torch.vtensor<[320,960,3,3],f16>
    %581 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %582 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %583 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %585 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %586 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %587 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %588 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf16>) : !torch.vtensor<[320,960,1,1],f16>
    %589 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %590 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %591 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %592 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %593 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %594 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %595 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %596 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %597 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %598 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %599 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %600 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %601 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %602 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %605 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %606 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %607 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %608 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %609 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %610 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
    %611 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %612 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %613 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %614 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %615 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %616 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %617 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %618 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16>
    %619 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %620 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %621 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %622 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %623 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %624 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %625 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %626 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16>
    %627 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %628 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %629 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %630 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %631 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %632 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %633 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %634 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %635 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %636 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %637 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %638 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %639 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %640 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %641 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %642 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %643 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %644 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %645 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
    %649 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %650 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %651 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %652 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %653 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %654 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %655 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %656 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16>
    %657 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %658 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %659 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %660 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %661 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %662 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
    %663 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %664 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16>
    %665 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %666 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %667 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %668 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %669 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %670 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %671 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %672 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %673 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %674 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %675 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %676 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %677 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %678 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %679 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %680 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %681 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
    %682 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
    %683 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %684 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %685 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %686 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
    %687 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %688 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %689 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %690 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
    %691 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %692 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %693 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %694 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16>
    %695 = torch.vtensor.literal(dense<[-1.393320e-03, -1.588820e-03, -2.624990e-04, -2.531050e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16>
    %int0 = torch.constant.int 0
    %float5.000000e-01 = torch.constant.float 5.000000e-01
    %false = torch.constant.bool false
    %int6 = torch.constant.int 6
    %none = torch.constant.none
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %int-1 = torch.constant.int -1
    %int5 = torch.constant.int 5
    %true = torch.constant.bool true
    %int3 = torch.constant.int 3
    %float1.000000e-05 = torch.constant.float 1.000000e-05
    %int8192 = torch.constant.int 8192
    %int-2 = torch.constant.int -2
    %int154 = torch.constant.int 154
    %int768 = torch.constant.int 768
    %int77 = torch.constant.int 77
    %str = torch.constant.str "none"
    %int2048 = torch.constant.int 2048
    %int512 = torch.constant.int 512
    %int128 = torch.constant.int 128
    %float2.000000e00 = torch.constant.float 2.000000e+00
    %696 = torch.prim.ListConstruct %arg0, %arg0 : (!torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>) -> !torch.list<vtensor>
    %697 = torch.aten.cat %696, %int0 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,4,64,64],f16>
    %698 = torch.aten.pow.Tensor_Scalar %arg3, %int2 : !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
    %699 = torch.aten.add.Tensor %698, %9, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32>
    %700 = torch.aten.pow.Tensor_Scalar %699, %float5.000000e-01 : !torch.vtensor<[],f32>, !torch.float -> !torch.vtensor<[],f32>
    %701 = torch.aten.div.Tensor %697, %700 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4,64,64],f16>
    %702 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %703 = torch.aten.broadcast_to %arg1, %702 : !torch.vtensor<[1],f16>, !torch.list<int> -> !torch.vtensor<[2],f16>
    %cuda3A0 = torch.constant.device "cuda:0"
    %704 = torch.aten.arange.start_step %int0, %int160, %int1, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
    %705 = torch.aten.mul.Tensor %704, %8 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32>
    %706 = torch.aten.div.Tensor %705, %7 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32>
    %707 = torch.aten.exp %706 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
    %708 = torch.aten.slice.Tensor %703, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f16>
    %709 = torch.aten.unsqueeze %708, %int1 : !torch.vtensor<[2],f16>, !torch.int -> !torch.vtensor<[2,1],f16>
    %710 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %711 = torch.aten.to.dtype %710, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %712 = torch.prim.ListConstruct %int2, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
    %713 = torch.aten.broadcast_to %711, %712 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,1],f32>
    %714 = torch.valsem.aten.copy %713, %709, %false : !torch.vtensor<[2,1],f32>, !torch.vtensor<[2,1],f16>, !torch.bool -> !torch.vtensor<[2,1],f32>
    %715 = torch.aten.unsqueeze %707, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
    %716 = torch.aten.slice.Tensor %715, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,160],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,160],f32>
    %717 = torch.aten.mul.Tensor %714, %716 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32>
    %718 = torch.aten.mul.Tensor %717, %9 : !torch.vtensor<[2,160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,160],f32>
    %719 = torch.aten.sin %718 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
    %720 = torch.aten.cos %718 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
    %721 = torch.prim.ListConstruct %719, %720 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
    %722 = torch.aten.cat %721, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
    %723 = torch.aten.slice.Tensor %722, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
    %724 = torch.aten.slice.Tensor %723, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
    %725 = torch.aten.slice.Tensor %722, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
    %726 = torch.aten.slice.Tensor %725, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
    %727 = torch.prim.ListConstruct %724, %726 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
    %728 = torch.aten.cat %727, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
    %729 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %730 = torch.aten.to.dtype %729, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %731 = torch.prim.ListConstruct %int2, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %732 = torch.aten.broadcast_to %730, %731 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320],f16>
    %733 = torch.valsem.aten.copy %732, %728, %false : !torch.vtensor<[2,320],f16>, !torch.vtensor<[2,320],f32>, !torch.bool -> !torch.vtensor<[2,320],f16>
    %734 = torch.aten.transpose.int %10, %int0, %int1 : !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,1280],f16>
    %735 = torch.aten.mm %733, %734 : !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %736 = torch.aten.mul.Scalar %11, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %737 = torch.aten.add.Tensor %736, %735, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %738 = torch.aten.sigmoid %737 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %739 = torch.aten.mul.Tensor %738, %737 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %740 = torch.aten.transpose.int %12, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %741 = torch.aten.mm %739, %740 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %742 = torch.aten.mul.Scalar %13, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %743 = torch.aten.add.Tensor %742, %741, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %744 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
    %745 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
    %746 = torch.aten.convolution %701, %14, %15, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %747 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %748 = torch.aten.view %746, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %749 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %750 = torch.aten.to.dtype %749, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %751 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %752 = torch.aten.broadcast_to %750, %751 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %753 = torch.valsem.aten.copy %752, %748, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %754 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
    %755 = torch.aten.to.dtype %753, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %756 = torch.aten.sum.dim_IntList %755, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %757 = torch.aten.div.Scalar %756, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %758 = torch.aten.sub.Tensor %755, %757, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %759 = torch.aten.mul.Tensor %758, %758 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %760 = torch.aten.sum.dim_IntList %759, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %761 = torch.aten.div.Scalar %760, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %762 = torch.aten.to.dtype %761, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %763 = torch.aten.sum.dim_IntList %753, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %764 = torch.aten.div.Scalar %763, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %765 = torch.aten.add.Tensor %762, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %766 = torch.aten.rsqrt %765 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %767 = torch.aten.sub.Tensor %748, %764, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %768 = torch.aten.mul.Tensor %767, %766 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %769 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %770 = torch.aten.view %768, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %771 = torch.aten.unsqueeze %16, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %772 = torch.aten.unsqueeze %771, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %773 = torch.aten.mul.Tensor %770, %772 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %774 = torch.aten.unsqueeze %17, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %775 = torch.aten.unsqueeze %774, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %776 = torch.aten.add.Tensor %773, %775, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %777 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %778 = torch.aten.to.dtype %777, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %779 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %780 = torch.aten.broadcast_to %778, %779 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %781 = torch.valsem.aten.copy %780, %776, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %782 = torch.aten.sigmoid %781 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %783 = torch.aten.mul.Tensor %782, %781 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %784 = torch.aten.convolution %783, %18, %19, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %785 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %786 = torch.aten.mul.Tensor %785, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %787 = torch.aten.transpose.int %20, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %788 = torch.aten.mm %786, %787 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
    %789 = torch.aten.mul.Scalar %21, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %790 = torch.aten.add.Tensor %789, %788, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
    %791 = torch.aten.slice.Tensor %790, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %792 = torch.aten.slice.Tensor %791, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %793 = torch.aten.unsqueeze %792, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
    %794 = torch.aten.unsqueeze %793, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
    %795 = torch.aten.add.Tensor %784, %794, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %796 = torch.aten.view %795, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %797 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %798 = torch.aten.to.dtype %797, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %799 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %800 = torch.aten.broadcast_to %798, %799 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %801 = torch.valsem.aten.copy %800, %796, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %802 = torch.aten.to.dtype %801, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %803 = torch.aten.sum.dim_IntList %802, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %804 = torch.aten.div.Scalar %803, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %805 = torch.aten.sub.Tensor %802, %804, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %806 = torch.aten.mul.Tensor %805, %805 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %807 = torch.aten.sum.dim_IntList %806, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %808 = torch.aten.div.Scalar %807, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %809 = torch.aten.to.dtype %808, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %810 = torch.aten.sum.dim_IntList %801, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %811 = torch.aten.div.Scalar %810, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %812 = torch.aten.add.Tensor %809, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %813 = torch.aten.rsqrt %812 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %814 = torch.aten.sub.Tensor %796, %811, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %815 = torch.aten.mul.Tensor %814, %813 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %816 = torch.aten.view %815, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %817 = torch.aten.unsqueeze %22, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %818 = torch.aten.unsqueeze %817, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %819 = torch.aten.mul.Tensor %816, %818 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %820 = torch.aten.unsqueeze %23, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %821 = torch.aten.unsqueeze %820, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %822 = torch.aten.add.Tensor %819, %821, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %823 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %824 = torch.aten.to.dtype %823, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %825 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %826 = torch.aten.broadcast_to %824, %825 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %827 = torch.valsem.aten.copy %826, %822, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %828 = torch.aten.sigmoid %827 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %829 = torch.aten.mul.Tensor %828, %827 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %830 = torch.aten.convolution %829, %24, %25, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %831 = torch.aten.add.Tensor %746, %830, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %832 = torch.aten.div.Tensor %831, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
    %833 = torch.aten.view %832, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %834 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %835 = torch.aten.to.dtype %834, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %836 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %837 = torch.aten.broadcast_to %835, %836 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %838 = torch.valsem.aten.copy %837, %833, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %839 = torch.aten.to.dtype %838, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %840 = torch.aten.sum.dim_IntList %839, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %841 = torch.aten.div.Scalar %840, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %842 = torch.aten.sub.Tensor %839, %841, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %843 = torch.aten.mul.Tensor %842, %842 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %844 = torch.aten.sum.dim_IntList %843, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %845 = torch.aten.div.Scalar %844, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %846 = torch.aten.to.dtype %845, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %847 = torch.aten.sum.dim_IntList %838, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %848 = torch.aten.div.Scalar %847, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %849 = torch.aten.add.Tensor %846, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %850 = torch.aten.rsqrt %849 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %851 = torch.aten.sub.Tensor %833, %848, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %852 = torch.aten.mul.Tensor %851, %850 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %853 = torch.aten.view %852, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %854 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %855 = torch.aten.unsqueeze %854, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %856 = torch.aten.mul.Tensor %853, %855 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %857 = torch.aten.unsqueeze %27, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %858 = torch.aten.unsqueeze %857, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %859 = torch.aten.add.Tensor %856, %858, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %860 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %861 = torch.aten.to.dtype %860, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %862 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %863 = torch.aten.broadcast_to %861, %862 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %864 = torch.valsem.aten.copy %863, %859, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %865 = torch.aten.convolution %864, %28, %29, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %866 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %867 = torch.aten.permute %865, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %868 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %869 = torch.aten.view %867, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %870 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %871 = torch.aten.sum.dim_IntList %869, %870, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %872 = torch.aten.div.Scalar %871, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %873 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %874 = torch.aten.broadcast_to %872, %873 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %875 = torch.aten.sub.Tensor %869, %874, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %876 = torch.aten.mul.Tensor %875, %875 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %877 = torch.aten.sum.dim_IntList %876, %870, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %878 = torch.aten.div.Scalar %877, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %879 = torch.aten.add.Scalar %878, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %880 = torch.aten.rsqrt %879 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %881 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %882 = torch.aten.broadcast_to %880, %881 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %883 = torch.aten.mul.Tensor %875, %882 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %884 = torch.aten.mul.Tensor %883, %30 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %885 = torch.aten.add.Tensor %884, %31, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %886 = torch.aten.transpose.int %32, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %887 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %888 = torch.aten.view %885, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %889 = torch.aten.mm %888, %886 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %890 = torch.aten.view %889, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %891 = torch.aten.transpose.int %33, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %892 = torch.aten.view %885, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %893 = torch.aten.mm %892, %891 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %894 = torch.aten.view %893, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %895 = torch.aten.transpose.int %34, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %896 = torch.aten.view %885, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %897 = torch.aten.mm %896, %895 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %898 = torch.aten.view %897, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %899 = torch.prim.ListConstruct %int2, %int4096, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %900 = torch.aten.view %890, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %901 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %902 = torch.aten.permute %900, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %903 = torch.aten.clone %902, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %904 = torch.prim.ListConstruct %int16, %int4096, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %905 = torch.aten.view %903, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %906 = torch.aten.view %894, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %907 = torch.aten.permute %906, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %908 = torch.aten.clone %907, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %909 = torch.aten.view %908, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %910 = torch.aten.view %898, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %911 = torch.aten.permute %910, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %912 = torch.aten.clone %911, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %913 = torch.aten.view %912, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %914 = torch.aten.transpose.int %909, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
    %915 = torch.aten.broadcast_to %905, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %916 = torch.aten.view %915, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %917 = torch.prim.ListConstruct %int16, %int40, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %918 = torch.aten.broadcast_to %914, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %919 = torch.aten.view %918, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %920 = torch.aten.bmm %916, %919 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %921 = torch.prim.ListConstruct %int16, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %922 = torch.aten.view %920, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %923 = torch.aten.mul.Tensor %922, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
    %values, %indices = torch.aten.max.dim %923, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %924 = torch.aten.sub.Tensor %923, %values, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
    %925 = torch.aten.exp %924 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %926 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %927 = torch.aten.sum.dim_IntList %925, %926, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %928 = torch.aten.div.Tensor %925, %927 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %929 = torch.aten.broadcast_to %928, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %930 = torch.aten.view %929, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %931 = torch.aten.broadcast_to %913, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %932 = torch.aten.view %931, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %933 = torch.aten.bmm %930, %932 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %934 = torch.aten.view %933, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %935 = torch.prim.ListConstruct %int2, %int8, %int4096, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %936 = torch.aten.view %934, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %937 = torch.aten.permute %936, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %938 = torch.aten.clone %937, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %939 = torch.aten.view %938, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %940 = torch.aten.transpose.int %35, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %941 = torch.aten.view %939, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %942 = torch.aten.mm %941, %940 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %943 = torch.aten.mul.Scalar %36, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %944 = torch.aten.add.Tensor %943, %942, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %945 = torch.aten.view %944, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %946 = torch.aten.add.Tensor %945, %869, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %947 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %948 = torch.aten.sum.dim_IntList %946, %947, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %949 = torch.aten.div.Scalar %948, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %950 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %951 = torch.aten.broadcast_to %949, %950 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %952 = torch.aten.sub.Tensor %946, %951, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %953 = torch.aten.mul.Tensor %952, %952 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %954 = torch.aten.sum.dim_IntList %953, %947, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %955 = torch.aten.div.Scalar %954, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %956 = torch.aten.add.Scalar %955, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %957 = torch.aten.rsqrt %956 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %958 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %959 = torch.aten.broadcast_to %957, %958 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %960 = torch.aten.mul.Tensor %952, %959 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %961 = torch.aten.mul.Tensor %960, %37 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %962 = torch.aten.add.Tensor %961, %38, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %963 = torch.aten.transpose.int %39, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %964 = torch.aten.view %962, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %965 = torch.aten.mm %964, %963 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %966 = torch.aten.view %965, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %967 = torch.aten.transpose.int %40, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %968 = torch.prim.ListConstruct %int154, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
    %969 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %970 = torch.aten.mm %969, %967 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %971 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %972 = torch.aten.view %970, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %973 = torch.aten.transpose.int %41, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %974 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %975 = torch.aten.mm %974, %973 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %976 = torch.aten.view %975, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %977 = torch.aten.view %966, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %978 = torch.aten.permute %977, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %979 = torch.aten.clone %978, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %980 = torch.aten.view %979, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %981 = torch.prim.ListConstruct %int2, %int77, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %982 = torch.aten.view %972, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %983 = torch.aten.permute %982, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %984 = torch.aten.clone %983, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %985 = torch.prim.ListConstruct %int16, %int77, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %986 = torch.aten.view %984, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %987 = torch.aten.view %976, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %988 = torch.aten.permute %987, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %989 = torch.aten.clone %988, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %990 = torch.aten.view %989, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %991 = torch.aten.transpose.int %986, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
    %992 = torch.aten.broadcast_to %980, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %993 = torch.aten.view %992, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %994 = torch.prim.ListConstruct %int16, %int40, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %995 = torch.aten.broadcast_to %991, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %996 = torch.aten.view %995, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %997 = torch.aten.bmm %993, %996 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %998 = torch.prim.ListConstruct %int16, %int4096, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %999 = torch.aten.view %997, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %1000 = torch.aten.mul.Tensor %999, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
    %values_0, %indices_1 = torch.aten.max.dim %1000, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %1001 = torch.aten.sub.Tensor %1000, %values_0, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
    %1002 = torch.aten.exp %1001 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %1003 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1004 = torch.aten.sum.dim_IntList %1002, %1003, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %1005 = torch.aten.div.Tensor %1002, %1004 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
    %1006 = torch.aten.broadcast_to %1005, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %1007 = torch.aten.view %1006, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %1008 = torch.aten.broadcast_to %990, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %1009 = torch.aten.view %1008, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %1010 = torch.aten.bmm %1007, %1009 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %1011 = torch.aten.view %1010, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1012 = torch.aten.view %1011, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1013 = torch.aten.permute %1012, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1014 = torch.aten.clone %1013, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %1015 = torch.aten.view %1014, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1016 = torch.aten.transpose.int %42, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1017 = torch.aten.view %1015, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1018 = torch.aten.mm %1017, %1016 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1019 = torch.aten.mul.Scalar %43, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1020 = torch.aten.add.Tensor %1019, %1018, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %1021 = torch.aten.view %1020, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1022 = torch.aten.add.Tensor %1021, %946, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1023 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1024 = torch.aten.sum.dim_IntList %1022, %1023, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1025 = torch.aten.div.Scalar %1024, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1026 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1027 = torch.aten.broadcast_to %1025, %1026 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1028 = torch.aten.sub.Tensor %1022, %1027, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1029 = torch.aten.mul.Tensor %1028, %1028 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1030 = torch.aten.sum.dim_IntList %1029, %1023, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1031 = torch.aten.div.Scalar %1030, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1032 = torch.aten.add.Scalar %1031, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1033 = torch.aten.rsqrt %1032 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %1034 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1035 = torch.aten.broadcast_to %1033, %1034 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1036 = torch.aten.mul.Tensor %1028, %1035 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1037 = torch.aten.mul.Tensor %1036, %44 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1038 = torch.aten.add.Tensor %1037, %45, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1039 = torch.aten.transpose.int %46, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %1040 = torch.aten.view %1038, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1041 = torch.aten.mm %1040, %1039 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
    %1042 = torch.aten.mul.Scalar %47, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %1043 = torch.aten.add.Tensor %1042, %1041, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
    %1044 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1045 = torch.aten.view %1043, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
    %1046 = torch.aten.slice.Tensor %1045, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %1047 = torch.aten.slice.Tensor %1045, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %1048 = torch.aten.gelu %1047, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
    %1049 = torch.aten.mul.Tensor %1046, %1048 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
    %1050 = torch.aten.transpose.int %48, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %1051 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %1052 = torch.aten.view %1049, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
    %1053 = torch.aten.mm %1052, %1050 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1054 = torch.aten.mul.Scalar %49, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1055 = torch.aten.add.Tensor %1054, %1053, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %1056 = torch.aten.view %1055, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1057 = torch.aten.add.Tensor %1056, %1022, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1058 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1059 = torch.aten.view %1057, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %1060 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1061 = torch.aten.permute %1059, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %1062 = torch.aten.convolution %1061, %50, %51, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1063 = torch.aten.add.Tensor %1062, %832, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1064 = torch.aten.clone %1063, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1065 = torch.aten.view %1064, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %1066 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1067 = torch.aten.to.dtype %1066, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1068 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1069 = torch.aten.broadcast_to %1067, %1068 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %1070 = torch.valsem.aten.copy %1069, %1065, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %1071 = torch.aten.to.dtype %1070, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %1072 = torch.aten.sum.dim_IntList %1071, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1073 = torch.aten.div.Scalar %1072, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1074 = torch.aten.sub.Tensor %1071, %1073, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %1075 = torch.aten.mul.Tensor %1074, %1074 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %1076 = torch.aten.sum.dim_IntList %1075, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1077 = torch.aten.div.Scalar %1076, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1078 = torch.aten.to.dtype %1077, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1079 = torch.aten.sum.dim_IntList %1070, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1080 = torch.aten.div.Scalar %1079, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1081 = torch.aten.add.Tensor %1078, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1082 = torch.aten.rsqrt %1081 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1083 = torch.aten.sub.Tensor %1065, %1080, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %1084 = torch.aten.mul.Tensor %1083, %1082 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %1085 = torch.aten.view %1084, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %1086 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1087 = torch.aten.unsqueeze %1086, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1088 = torch.aten.mul.Tensor %1085, %1087 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %1089 = torch.aten.unsqueeze %53, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1090 = torch.aten.unsqueeze %1089, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1091 = torch.aten.add.Tensor %1088, %1090, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %1092 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1093 = torch.aten.to.dtype %1092, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1094 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1095 = torch.aten.broadcast_to %1093, %1094 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %1096 = torch.valsem.aten.copy %1095, %1091, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %1097 = torch.aten.sigmoid %1096 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %1098 = torch.aten.mul.Tensor %1097, %1096 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %1099 = torch.aten.convolution %1098, %54, %55, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1100 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %1101 = torch.aten.mul.Tensor %1100, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %1102 = torch.aten.transpose.int %56, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %1103 = torch.aten.mm %1101, %1102 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
    %1104 = torch.aten.mul.Scalar %57, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1105 = torch.aten.add.Tensor %1104, %1103, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
    %1106 = torch.aten.slice.Tensor %1105, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %1107 = torch.aten.slice.Tensor %1106, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %1108 = torch.aten.unsqueeze %1107, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
    %1109 = torch.aten.unsqueeze %1108, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
    %1110 = torch.aten.add.Tensor %1099, %1109, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1111 = torch.aten.view %1110, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %1112 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1113 = torch.aten.to.dtype %1112, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1114 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1115 = torch.aten.broadcast_to %1113, %1114 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %1116 = torch.valsem.aten.copy %1115, %1111, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %1117 = torch.aten.to.dtype %1116, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %1118 = torch.aten.sum.dim_IntList %1117, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1119 = torch.aten.div.Scalar %1118, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1120 = torch.aten.sub.Tensor %1117, %1119, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %1121 = torch.aten.mul.Tensor %1120, %1120 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %1122 = torch.aten.sum.dim_IntList %1121, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1123 = torch.aten.div.Scalar %1122, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1124 = torch.aten.to.dtype %1123, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1125 = torch.aten.sum.dim_IntList %1116, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1126 = torch.aten.div.Scalar %1125, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1127 = torch.aten.add.Tensor %1124, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1128 = torch.aten.rsqrt %1127 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1129 = torch.aten.sub.Tensor %1111, %1126, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %1130 = torch.aten.mul.Tensor %1129, %1128 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %1131 = torch.aten.view %1130, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %1132 = torch.aten.unsqueeze %58, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1133 = torch.aten.unsqueeze %1132, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1134 = torch.aten.mul.Tensor %1131, %1133 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %1135 = torch.aten.unsqueeze %59, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1136 = torch.aten.unsqueeze %1135, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1137 = torch.aten.add.Tensor %1134, %1136, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %1138 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1139 = torch.aten.to.dtype %1138, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1140 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1141 = torch.aten.broadcast_to %1139, %1140 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %1142 = torch.valsem.aten.copy %1141, %1137, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %1143 = torch.aten.sigmoid %1142 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %1144 = torch.aten.mul.Tensor %1143, %1142 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %1145 = torch.aten.convolution %1144, %60, %61, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1146 = torch.aten.add.Tensor %1063, %1145, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1147 = torch.aten.div.Tensor %1146, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
    %1148 = torch.aten.clone %1147, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1149 = torch.aten.view %1148, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %1150 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1151 = torch.aten.to.dtype %1150, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1152 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1153 = torch.aten.broadcast_to %1151, %1152 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %1154 = torch.valsem.aten.copy %1153, %1149, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %1155 = torch.aten.to.dtype %1154, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %1156 = torch.aten.sum.dim_IntList %1155, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1157 = torch.aten.div.Scalar %1156, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1158 = torch.aten.sub.Tensor %1155, %1157, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %1159 = torch.aten.mul.Tensor %1158, %1158 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %1160 = torch.aten.sum.dim_IntList %1159, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1161 = torch.aten.div.Scalar %1160, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1162 = torch.aten.to.dtype %1161, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1163 = torch.aten.sum.dim_IntList %1154, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1164 = torch.aten.div.Scalar %1163, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1165 = torch.aten.add.Tensor %1162, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1166 = torch.aten.rsqrt %1165 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1167 = torch.aten.sub.Tensor %1149, %1164, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %1168 = torch.aten.mul.Tensor %1167, %1166 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %1169 = torch.aten.view %1168, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %1170 = torch.aten.unsqueeze %62, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1171 = torch.aten.unsqueeze %1170, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1172 = torch.aten.mul.Tensor %1169, %1171 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %1173 = torch.aten.unsqueeze %63, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1174 = torch.aten.unsqueeze %1173, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1175 = torch.aten.add.Tensor %1172, %1174, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %1176 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1177 = torch.aten.to.dtype %1176, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1178 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1179 = torch.aten.broadcast_to %1177, %1178 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %1180 = torch.valsem.aten.copy %1179, %1175, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %1181 = torch.aten.convolution %1180, %64, %65, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1182 = torch.aten.permute %1181, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %1183 = torch.aten.view %1182, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1184 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1185 = torch.aten.sum.dim_IntList %1183, %1184, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1186 = torch.aten.div.Scalar %1185, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1187 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1188 = torch.aten.broadcast_to %1186, %1187 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1189 = torch.aten.sub.Tensor %1183, %1188, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1190 = torch.aten.mul.Tensor %1189, %1189 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1191 = torch.aten.sum.dim_IntList %1190, %1184, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1192 = torch.aten.div.Scalar %1191, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1193 = torch.aten.add.Scalar %1192, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1194 = torch.aten.rsqrt %1193 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %1195 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1196 = torch.aten.broadcast_to %1194, %1195 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1197 = torch.aten.mul.Tensor %1189, %1196 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1198 = torch.aten.mul.Tensor %1197, %66 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1199 = torch.aten.add.Tensor %1198, %67, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1200 = torch.aten.transpose.int %68, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1201 = torch.aten.view %1199, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1202 = torch.aten.mm %1201, %1200 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1203 = torch.aten.view %1202, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1204 = torch.aten.transpose.int %69, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1205 = torch.aten.view %1199, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1206 = torch.aten.mm %1205, %1204 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1207 = torch.aten.view %1206, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1208 = torch.aten.transpose.int %70, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1209 = torch.aten.view %1199, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1210 = torch.aten.mm %1209, %1208 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1211 = torch.aten.view %1210, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1212 = torch.aten.view %1203, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1213 = torch.aten.permute %1212, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1214 = torch.aten.clone %1213, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %1215 = torch.aten.view %1214, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1216 = torch.aten.view %1207, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1217 = torch.aten.permute %1216, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1218 = torch.aten.clone %1217, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %1219 = torch.aten.view %1218, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1220 = torch.aten.view %1211, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1221 = torch.aten.permute %1220, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1222 = torch.aten.clone %1221, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %1223 = torch.aten.view %1222, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1224 = torch.aten.transpose.int %1219, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
    %1225 = torch.aten.broadcast_to %1215, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1226 = torch.aten.view %1225, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1227 = torch.aten.broadcast_to %1224, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %1228 = torch.aten.view %1227, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %1229 = torch.aten.bmm %1226, %1228 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %1230 = torch.aten.view %1229, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %1231 = torch.aten.mul.Tensor %1230, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
    %values_2, %indices_3 = torch.aten.max.dim %1231, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %1232 = torch.aten.sub.Tensor %1231, %values_2, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
    %1233 = torch.aten.exp %1232 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %1234 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1235 = torch.aten.sum.dim_IntList %1233, %1234, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %1236 = torch.aten.div.Tensor %1233, %1235 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %1237 = torch.aten.broadcast_to %1236, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %1238 = torch.aten.view %1237, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %1239 = torch.aten.broadcast_to %1223, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1240 = torch.aten.view %1239, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1241 = torch.aten.bmm %1238, %1240 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %1242 = torch.aten.view %1241, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1243 = torch.aten.view %1242, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1244 = torch.aten.permute %1243, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1245 = torch.aten.clone %1244, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %1246 = torch.aten.view %1245, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1247 = torch.aten.transpose.int %71, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1248 = torch.aten.view %1246, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1249 = torch.aten.mm %1248, %1247 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1250 = torch.aten.mul.Scalar %72, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1251 = torch.aten.add.Tensor %1250, %1249, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %1252 = torch.aten.view %1251, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1253 = torch.aten.add.Tensor %1252, %1183, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1254 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1255 = torch.aten.sum.dim_IntList %1253, %1254, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1256 = torch.aten.div.Scalar %1255, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1257 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1258 = torch.aten.broadcast_to %1256, %1257 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1259 = torch.aten.sub.Tensor %1253, %1258, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1260 = torch.aten.mul.Tensor %1259, %1259 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1261 = torch.aten.sum.dim_IntList %1260, %1254, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1262 = torch.aten.div.Scalar %1261, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1263 = torch.aten.add.Scalar %1262, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1264 = torch.aten.rsqrt %1263 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %1265 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1266 = torch.aten.broadcast_to %1264, %1265 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1267 = torch.aten.mul.Tensor %1259, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1268 = torch.aten.mul.Tensor %1267, %73 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1269 = torch.aten.add.Tensor %1268, %74, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1270 = torch.aten.transpose.int %75, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1271 = torch.aten.view %1269, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1272 = torch.aten.mm %1271, %1270 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1273 = torch.aten.view %1272, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1274 = torch.aten.transpose.int %76, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %1275 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %1276 = torch.aten.mm %1275, %1274 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %1277 = torch.aten.view %1276, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %1278 = torch.aten.transpose.int %77, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %1279 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %1280 = torch.aten.mm %1279, %1278 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %1281 = torch.aten.view %1280, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %1282 = torch.aten.view %1273, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1283 = torch.aten.permute %1282, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1284 = torch.aten.clone %1283, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %1285 = torch.aten.view %1284, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1286 = torch.aten.view %1277, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %1287 = torch.aten.permute %1286, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %1288 = torch.aten.clone %1287, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %1289 = torch.aten.view %1288, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %1290 = torch.aten.view %1281, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %1291 = torch.aten.permute %1290, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %1292 = torch.aten.clone %1291, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %1293 = torch.aten.view %1292, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %1294 = torch.aten.transpose.int %1289, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
    %1295 = torch.aten.broadcast_to %1285, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1296 = torch.aten.view %1295, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1297 = torch.aten.broadcast_to %1294, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %1298 = torch.aten.view %1297, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %1299 = torch.aten.bmm %1296, %1298 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %1300 = torch.aten.view %1299, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %1301 = torch.aten.mul.Tensor %1300, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
    %values_4, %indices_5 = torch.aten.max.dim %1301, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %1302 = torch.aten.sub.Tensor %1301, %values_4, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
    %1303 = torch.aten.exp %1302 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %1304 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1305 = torch.aten.sum.dim_IntList %1303, %1304, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %1306 = torch.aten.div.Tensor %1303, %1305 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
    %1307 = torch.aten.broadcast_to %1306, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %1308 = torch.aten.view %1307, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %1309 = torch.aten.broadcast_to %1293, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %1310 = torch.aten.view %1309, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %1311 = torch.aten.bmm %1308, %1310 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %1312 = torch.aten.view %1311, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %1313 = torch.aten.view %1312, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %1314 = torch.aten.permute %1313, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %1315 = torch.aten.clone %1314, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %1316 = torch.aten.view %1315, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1317 = torch.aten.transpose.int %78, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1318 = torch.aten.view %1316, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1319 = torch.aten.mm %1318, %1317 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1320 = torch.aten.mul.Scalar %79, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1321 = torch.aten.add.Tensor %1320, %1319, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %1322 = torch.aten.view %1321, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1323 = torch.aten.add.Tensor %1322, %1253, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1324 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1325 = torch.aten.sum.dim_IntList %1323, %1324, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1326 = torch.aten.div.Scalar %1325, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1327 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1328 = torch.aten.broadcast_to %1326, %1327 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1329 = torch.aten.sub.Tensor %1323, %1328, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1330 = torch.aten.mul.Tensor %1329, %1329 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1331 = torch.aten.sum.dim_IntList %1330, %1324, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %1332 = torch.aten.div.Scalar %1331, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1333 = torch.aten.add.Scalar %1332, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %1334 = torch.aten.rsqrt %1333 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %1335 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1336 = torch.aten.broadcast_to %1334, %1335 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1337 = torch.aten.mul.Tensor %1329, %1336 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1338 = torch.aten.mul.Tensor %1337, %80 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %1339 = torch.aten.add.Tensor %1338, %81, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1340 = torch.aten.transpose.int %82, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %1341 = torch.aten.view %1339, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %1342 = torch.aten.mm %1341, %1340 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
    %1343 = torch.aten.mul.Scalar %83, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %1344 = torch.aten.add.Tensor %1343, %1342, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
    %1345 = torch.aten.view %1344, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
    %1346 = torch.aten.slice.Tensor %1345, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %1347 = torch.aten.slice.Tensor %1345, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %1348 = torch.aten.gelu %1347, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
    %1349 = torch.aten.mul.Tensor %1346, %1348 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
    %1350 = torch.aten.transpose.int %84, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %1351 = torch.aten.view %1349, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
    %1352 = torch.aten.mm %1351, %1350 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
    %1353 = torch.aten.mul.Scalar %85, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1354 = torch.aten.add.Tensor %1353, %1352, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %1355 = torch.aten.view %1354, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %1356 = torch.aten.add.Tensor %1355, %1323, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %1357 = torch.aten.view %1356, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %1358 = torch.aten.permute %1357, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %1359 = torch.aten.convolution %1358, %86, %87, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1360 = torch.aten.add.Tensor %1359, %1147, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %1361 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
    %1362 = torch.aten.convolution %1360, %88, %89, %1361, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,32,32],f16>
    %1363 = torch.aten.clone %1362, %int0 : !torch.vtensor<[2,320,32,32],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16>
    %1364 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1365 = torch.aten.view %1363, %1364 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16>
    %1366 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1367 = torch.aten.to.dtype %1366, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1368 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1369 = torch.aten.broadcast_to %1367, %1368 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f32>
    %1370 = torch.valsem.aten.copy %1369, %1365, %false : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,10,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,10,1024],f32>
    %1371 = torch.aten.to.dtype %1370, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f64>
    %1372 = torch.aten.sum.dim_IntList %1371, %754, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1373 = torch.aten.div.Scalar %1372, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1374 = torch.aten.sub.Tensor %1371, %1373, %float1.000000e00 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,1024],f64>
    %1375 = torch.aten.mul.Tensor %1374, %1374 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,10,1024],f64> -> !torch.vtensor<[2,32,10,1024],f64>
    %1376 = torch.aten.sum.dim_IntList %1375, %754, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1377 = torch.aten.div.Scalar %1376, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1378 = torch.aten.to.dtype %1377, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1379 = torch.aten.sum.dim_IntList %1370, %754, %true, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1380 = torch.aten.div.Scalar %1379, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1381 = torch.aten.add.Tensor %1378, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1382 = torch.aten.rsqrt %1381 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1383 = torch.aten.sub.Tensor %1365, %1380, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,1024],f32>
    %1384 = torch.aten.mul.Tensor %1383, %1382 : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,1024],f32>
    %1385 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1386 = torch.aten.view %1384, %1385 : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f32>
    %1387 = torch.aten.unsqueeze %90, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1388 = torch.aten.unsqueeze %1387, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1389 = torch.aten.mul.Tensor %1386, %1388 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f32>
    %1390 = torch.aten.unsqueeze %91, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %1391 = torch.aten.unsqueeze %1390, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %1392 = torch.aten.add.Tensor %1389, %1391, %int1 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f32>
    %1393 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1394 = torch.aten.to.dtype %1393, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1395 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1396 = torch.aten.broadcast_to %1394, %1395 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16>
    %1397 = torch.valsem.aten.copy %1396, %1392, %false : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f32>, !torch.bool -> !torch.vtensor<[2,320,32,32],f16>
    %1398 = torch.aten.sigmoid %1397 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16>
    %1399 = torch.aten.mul.Tensor %1398, %1397 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16>
    %1400 = torch.aten.convolution %1399, %92, %93, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1401 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %1402 = torch.aten.mul.Tensor %1401, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %1403 = torch.aten.transpose.int %94, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %1404 = torch.aten.mm %1402, %1403 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
    %1405 = torch.aten.mul.Scalar %95, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1406 = torch.aten.add.Tensor %1405, %1404, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
    %1407 = torch.aten.slice.Tensor %1406, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %1408 = torch.aten.slice.Tensor %1407, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %1409 = torch.aten.unsqueeze %1408, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
    %1410 = torch.aten.unsqueeze %1409, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
    %1411 = torch.aten.add.Tensor %1400, %1410, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1412 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1413 = torch.aten.view %1411, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %1414 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1415 = torch.aten.to.dtype %1414, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1416 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1417 = torch.aten.broadcast_to %1415, %1416 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %1418 = torch.valsem.aten.copy %1417, %1413, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %1419 = torch.aten.to.dtype %1418, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %1420 = torch.aten.sum.dim_IntList %1419, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1421 = torch.aten.div.Scalar %1420, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1422 = torch.aten.sub.Tensor %1419, %1421, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %1423 = torch.aten.mul.Tensor %1422, %1422 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %1424 = torch.aten.sum.dim_IntList %1423, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1425 = torch.aten.div.Scalar %1424, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1426 = torch.aten.to.dtype %1425, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1427 = torch.aten.sum.dim_IntList %1418, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1428 = torch.aten.div.Scalar %1427, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1429 = torch.aten.add.Tensor %1426, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1430 = torch.aten.rsqrt %1429 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1431 = torch.aten.sub.Tensor %1413, %1428, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %1432 = torch.aten.mul.Tensor %1431, %1430 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %1433 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1434 = torch.aten.view %1432, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %1435 = torch.aten.unsqueeze %96, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1436 = torch.aten.unsqueeze %1435, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1437 = torch.aten.mul.Tensor %1434, %1436 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %1438 = torch.aten.unsqueeze %97, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1439 = torch.aten.unsqueeze %1438, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1440 = torch.aten.add.Tensor %1437, %1439, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %1441 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1442 = torch.aten.to.dtype %1441, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1443 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1444 = torch.aten.broadcast_to %1442, %1443 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1445 = torch.valsem.aten.copy %1444, %1440, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %1446 = torch.aten.sigmoid %1445 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %1447 = torch.aten.mul.Tensor %1446, %1445 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %1448 = torch.aten.convolution %1447, %98, %99, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1449 = torch.aten.convolution %1362, %100, %101, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1450 = torch.aten.add.Tensor %1449, %1448, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1451 = torch.aten.div.Tensor %1450, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
    %1452 = torch.aten.clone %1451, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1453 = torch.aten.view %1452, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %1454 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1455 = torch.aten.to.dtype %1454, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1456 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1457 = torch.aten.broadcast_to %1455, %1456 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %1458 = torch.valsem.aten.copy %1457, %1453, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %1459 = torch.aten.to.dtype %1458, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %1460 = torch.aten.sum.dim_IntList %1459, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1461 = torch.aten.div.Scalar %1460, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1462 = torch.aten.sub.Tensor %1459, %1461, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %1463 = torch.aten.mul.Tensor %1462, %1462 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %1464 = torch.aten.sum.dim_IntList %1463, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1465 = torch.aten.div.Scalar %1464, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1466 = torch.aten.to.dtype %1465, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1467 = torch.aten.sum.dim_IntList %1458, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1468 = torch.aten.div.Scalar %1467, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1469 = torch.aten.add.Tensor %1466, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1470 = torch.aten.rsqrt %1469 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1471 = torch.aten.sub.Tensor %1453, %1468, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %1472 = torch.aten.mul.Tensor %1471, %1470 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %1473 = torch.aten.view %1472, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %1474 = torch.aten.unsqueeze %102, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1475 = torch.aten.unsqueeze %1474, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1476 = torch.aten.mul.Tensor %1473, %1475 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %1477 = torch.aten.unsqueeze %103, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1478 = torch.aten.unsqueeze %1477, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1479 = torch.aten.add.Tensor %1476, %1478, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %1480 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1481 = torch.aten.to.dtype %1480, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1482 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1483 = torch.aten.broadcast_to %1481, %1482 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1484 = torch.valsem.aten.copy %1483, %1479, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %1485 = torch.aten.convolution %1484, %104, %105, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1486 = torch.aten.permute %1485, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %1487 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1488 = torch.aten.view %1486, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1489 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1490 = torch.aten.sum.dim_IntList %1488, %1489, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1491 = torch.aten.div.Scalar %1490, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1492 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1493 = torch.aten.broadcast_to %1491, %1492 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1494 = torch.aten.sub.Tensor %1488, %1493, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1495 = torch.aten.mul.Tensor %1494, %1494 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1496 = torch.aten.sum.dim_IntList %1495, %1489, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1497 = torch.aten.div.Scalar %1496, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1498 = torch.aten.add.Scalar %1497, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1499 = torch.aten.rsqrt %1498 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %1500 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1501 = torch.aten.broadcast_to %1499, %1500 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1502 = torch.aten.mul.Tensor %1494, %1501 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1503 = torch.aten.mul.Tensor %1502, %106 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1504 = torch.aten.add.Tensor %1503, %107, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1505 = torch.aten.transpose.int %108, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1506 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
    %1507 = torch.aten.view %1504, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1508 = torch.aten.mm %1507, %1505 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1509 = torch.aten.view %1508, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1510 = torch.aten.transpose.int %109, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1511 = torch.aten.view %1504, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1512 = torch.aten.mm %1511, %1510 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1513 = torch.aten.view %1512, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1514 = torch.aten.transpose.int %110, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1515 = torch.aten.view %1504, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1516 = torch.aten.mm %1515, %1514 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1517 = torch.aten.view %1516, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1518 = torch.prim.ListConstruct %int2, %int1024, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1519 = torch.aten.view %1509, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1520 = torch.aten.permute %1519, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1521 = torch.aten.clone %1520, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1522 = torch.prim.ListConstruct %int16, %int1024, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1523 = torch.aten.view %1521, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1524 = torch.aten.view %1513, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1525 = torch.aten.permute %1524, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1526 = torch.aten.clone %1525, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1527 = torch.aten.view %1526, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1528 = torch.aten.view %1517, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1529 = torch.aten.permute %1528, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1530 = torch.aten.clone %1529, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1531 = torch.aten.view %1530, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1532 = torch.aten.transpose.int %1527, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
    %1533 = torch.aten.broadcast_to %1523, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1534 = torch.aten.view %1533, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1535 = torch.prim.ListConstruct %int16, %int80, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1536 = torch.aten.broadcast_to %1532, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %1537 = torch.aten.view %1536, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %1538 = torch.aten.bmm %1534, %1537 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %1539 = torch.prim.ListConstruct %int16, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1540 = torch.aten.view %1538, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %1541 = torch.aten.mul.Tensor %1540, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
    %values_6, %indices_7 = torch.aten.max.dim %1541, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %1542 = torch.aten.sub.Tensor %1541, %values_6, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
    %1543 = torch.aten.exp %1542 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %1544 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1545 = torch.aten.sum.dim_IntList %1543, %1544, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %1546 = torch.aten.div.Tensor %1543, %1545 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %1547 = torch.aten.broadcast_to %1546, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %1548 = torch.aten.view %1547, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %1549 = torch.aten.broadcast_to %1531, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1550 = torch.aten.view %1549, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1551 = torch.aten.bmm %1548, %1550 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %1552 = torch.aten.view %1551, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1553 = torch.prim.ListConstruct %int2, %int8, %int1024, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1554 = torch.aten.view %1552, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1555 = torch.aten.permute %1554, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1556 = torch.aten.clone %1555, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %1557 = torch.aten.view %1556, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1558 = torch.aten.transpose.int %111, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1559 = torch.aten.view %1557, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1560 = torch.aten.mm %1559, %1558 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1561 = torch.aten.mul.Scalar %112, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1562 = torch.aten.add.Tensor %1561, %1560, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %1563 = torch.aten.view %1562, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1564 = torch.aten.add.Tensor %1563, %1488, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1565 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1566 = torch.aten.sum.dim_IntList %1564, %1565, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1567 = torch.aten.div.Scalar %1566, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1568 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1569 = torch.aten.broadcast_to %1567, %1568 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1570 = torch.aten.sub.Tensor %1564, %1569, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1571 = torch.aten.mul.Tensor %1570, %1570 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1572 = torch.aten.sum.dim_IntList %1571, %1565, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1573 = torch.aten.div.Scalar %1572, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1574 = torch.aten.add.Scalar %1573, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1575 = torch.aten.rsqrt %1574 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %1576 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1577 = torch.aten.broadcast_to %1575, %1576 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1578 = torch.aten.mul.Tensor %1570, %1577 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1579 = torch.aten.mul.Tensor %1578, %113 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1580 = torch.aten.add.Tensor %1579, %114, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1581 = torch.aten.transpose.int %115, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1582 = torch.aten.view %1580, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1583 = torch.aten.mm %1582, %1581 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1584 = torch.aten.view %1583, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1585 = torch.aten.transpose.int %116, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %1586 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %1587 = torch.aten.mm %1586, %1585 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %1588 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1589 = torch.aten.view %1587, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %1590 = torch.aten.transpose.int %117, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %1591 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %1592 = torch.aten.mm %1591, %1590 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %1593 = torch.aten.view %1592, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %1594 = torch.aten.view %1584, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1595 = torch.aten.permute %1594, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1596 = torch.aten.clone %1595, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1597 = torch.aten.view %1596, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1598 = torch.prim.ListConstruct %int2, %int77, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1599 = torch.aten.view %1589, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %1600 = torch.aten.permute %1599, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %1601 = torch.aten.clone %1600, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %1602 = torch.prim.ListConstruct %int16, %int77, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1603 = torch.aten.view %1601, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1604 = torch.aten.view %1593, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %1605 = torch.aten.permute %1604, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %1606 = torch.aten.clone %1605, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %1607 = torch.aten.view %1606, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1608 = torch.aten.transpose.int %1603, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
    %1609 = torch.aten.broadcast_to %1597, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1610 = torch.aten.view %1609, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1611 = torch.prim.ListConstruct %int16, %int80, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1612 = torch.aten.broadcast_to %1608, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %1613 = torch.aten.view %1612, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %1614 = torch.aten.bmm %1610, %1613 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %1615 = torch.prim.ListConstruct %int16, %int1024, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1616 = torch.aten.view %1614, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %1617 = torch.aten.mul.Tensor %1616, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
    %values_8, %indices_9 = torch.aten.max.dim %1617, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %1618 = torch.aten.sub.Tensor %1617, %values_8, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
    %1619 = torch.aten.exp %1618 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %1620 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1621 = torch.aten.sum.dim_IntList %1619, %1620, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %1622 = torch.aten.div.Tensor %1619, %1621 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
    %1623 = torch.aten.broadcast_to %1622, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %1624 = torch.aten.view %1623, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %1625 = torch.aten.broadcast_to %1607, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1626 = torch.aten.view %1625, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1627 = torch.aten.bmm %1624, %1626 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %1628 = torch.aten.view %1627, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1629 = torch.aten.view %1628, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1630 = torch.aten.permute %1629, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1631 = torch.aten.clone %1630, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %1632 = torch.aten.view %1631, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1633 = torch.aten.transpose.int %118, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1634 = torch.aten.view %1632, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1635 = torch.aten.mm %1634, %1633 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1636 = torch.aten.mul.Scalar %119, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1637 = torch.aten.add.Tensor %1636, %1635, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %1638 = torch.aten.view %1637, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1639 = torch.aten.add.Tensor %1638, %1564, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1640 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1641 = torch.aten.sum.dim_IntList %1639, %1640, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1642 = torch.aten.div.Scalar %1641, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1643 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1644 = torch.aten.broadcast_to %1642, %1643 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1645 = torch.aten.sub.Tensor %1639, %1644, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1646 = torch.aten.mul.Tensor %1645, %1645 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1647 = torch.aten.sum.dim_IntList %1646, %1640, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1648 = torch.aten.div.Scalar %1647, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1649 = torch.aten.add.Scalar %1648, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1650 = torch.aten.rsqrt %1649 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %1651 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1652 = torch.aten.broadcast_to %1650, %1651 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1653 = torch.aten.mul.Tensor %1645, %1652 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1654 = torch.aten.mul.Tensor %1653, %120 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1655 = torch.aten.add.Tensor %1654, %121, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1656 = torch.aten.transpose.int %122, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %1657 = torch.aten.view %1655, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1658 = torch.aten.mm %1657, %1656 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
    %1659 = torch.aten.mul.Scalar %123, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %1660 = torch.aten.add.Tensor %1659, %1658, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
    %1661 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1662 = torch.aten.view %1660, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
    %1663 = torch.aten.slice.Tensor %1662, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %1664 = torch.aten.slice.Tensor %1662, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %1665 = torch.aten.gelu %1664, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
    %1666 = torch.aten.mul.Tensor %1663, %1665 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
    %1667 = torch.aten.transpose.int %124, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %1668 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
    %1669 = torch.aten.view %1666, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
    %1670 = torch.aten.mm %1669, %1667 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1671 = torch.aten.mul.Scalar %125, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1672 = torch.aten.add.Tensor %1671, %1670, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %1673 = torch.aten.view %1672, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1674 = torch.aten.add.Tensor %1673, %1639, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1675 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1676 = torch.aten.view %1674, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %1677 = torch.aten.permute %1676, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1678 = torch.aten.convolution %1677, %126, %127, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1679 = torch.aten.add.Tensor %1678, %1451, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1680 = torch.aten.clone %1679, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1681 = torch.aten.view %1680, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %1682 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1683 = torch.aten.to.dtype %1682, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1684 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1685 = torch.aten.broadcast_to %1683, %1684 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %1686 = torch.valsem.aten.copy %1685, %1681, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %1687 = torch.aten.to.dtype %1686, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %1688 = torch.aten.sum.dim_IntList %1687, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1689 = torch.aten.div.Scalar %1688, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1690 = torch.aten.sub.Tensor %1687, %1689, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %1691 = torch.aten.mul.Tensor %1690, %1690 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %1692 = torch.aten.sum.dim_IntList %1691, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1693 = torch.aten.div.Scalar %1692, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1694 = torch.aten.to.dtype %1693, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1695 = torch.aten.sum.dim_IntList %1686, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1696 = torch.aten.div.Scalar %1695, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1697 = torch.aten.add.Tensor %1694, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1698 = torch.aten.rsqrt %1697 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1699 = torch.aten.sub.Tensor %1681, %1696, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %1700 = torch.aten.mul.Tensor %1699, %1698 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %1701 = torch.aten.view %1700, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %1702 = torch.aten.unsqueeze %128, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1703 = torch.aten.unsqueeze %1702, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1704 = torch.aten.mul.Tensor %1701, %1703 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %1705 = torch.aten.unsqueeze %129, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1706 = torch.aten.unsqueeze %1705, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1707 = torch.aten.add.Tensor %1704, %1706, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %1708 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1709 = torch.aten.to.dtype %1708, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1710 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1711 = torch.aten.broadcast_to %1709, %1710 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1712 = torch.valsem.aten.copy %1711, %1707, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %1713 = torch.aten.sigmoid %1712 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %1714 = torch.aten.mul.Tensor %1713, %1712 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %1715 = torch.aten.convolution %1714, %130, %131, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1716 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %1717 = torch.aten.mul.Tensor %1716, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %1718 = torch.aten.transpose.int %132, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %1719 = torch.aten.mm %1717, %1718 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
    %1720 = torch.aten.mul.Scalar %133, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1721 = torch.aten.add.Tensor %1720, %1719, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
    %1722 = torch.aten.slice.Tensor %1721, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %1723 = torch.aten.slice.Tensor %1722, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %1724 = torch.aten.unsqueeze %1723, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
    %1725 = torch.aten.unsqueeze %1724, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
    %1726 = torch.aten.add.Tensor %1715, %1725, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1727 = torch.aten.view %1726, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %1728 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1729 = torch.aten.to.dtype %1728, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1730 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1731 = torch.aten.broadcast_to %1729, %1730 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %1732 = torch.valsem.aten.copy %1731, %1727, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %1733 = torch.aten.to.dtype %1732, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %1734 = torch.aten.sum.dim_IntList %1733, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1735 = torch.aten.div.Scalar %1734, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1736 = torch.aten.sub.Tensor %1733, %1735, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %1737 = torch.aten.mul.Tensor %1736, %1736 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %1738 = torch.aten.sum.dim_IntList %1737, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1739 = torch.aten.div.Scalar %1738, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1740 = torch.aten.to.dtype %1739, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1741 = torch.aten.sum.dim_IntList %1732, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1742 = torch.aten.div.Scalar %1741, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1743 = torch.aten.add.Tensor %1740, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1744 = torch.aten.rsqrt %1743 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1745 = torch.aten.sub.Tensor %1727, %1742, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %1746 = torch.aten.mul.Tensor %1745, %1744 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %1747 = torch.aten.view %1746, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %1748 = torch.aten.unsqueeze %134, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1749 = torch.aten.unsqueeze %1748, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1750 = torch.aten.mul.Tensor %1747, %1749 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %1751 = torch.aten.unsqueeze %135, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1752 = torch.aten.unsqueeze %1751, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1753 = torch.aten.add.Tensor %1750, %1752, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %1754 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1755 = torch.aten.to.dtype %1754, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1756 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1757 = torch.aten.broadcast_to %1755, %1756 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1758 = torch.valsem.aten.copy %1757, %1753, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %1759 = torch.aten.sigmoid %1758 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %1760 = torch.aten.mul.Tensor %1759, %1758 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %1761 = torch.aten.convolution %1760, %136, %137, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1762 = torch.aten.add.Tensor %1679, %1761, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1763 = torch.aten.div.Tensor %1762, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
    %1764 = torch.aten.clone %1763, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1765 = torch.aten.view %1764, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %1766 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1767 = torch.aten.to.dtype %1766, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1768 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1769 = torch.aten.broadcast_to %1767, %1768 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %1770 = torch.valsem.aten.copy %1769, %1765, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %1771 = torch.aten.to.dtype %1770, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %1772 = torch.aten.sum.dim_IntList %1771, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1773 = torch.aten.div.Scalar %1772, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1774 = torch.aten.sub.Tensor %1771, %1773, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %1775 = torch.aten.mul.Tensor %1774, %1774 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %1776 = torch.aten.sum.dim_IntList %1775, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1777 = torch.aten.div.Scalar %1776, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1778 = torch.aten.to.dtype %1777, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1779 = torch.aten.sum.dim_IntList %1770, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1780 = torch.aten.div.Scalar %1779, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1781 = torch.aten.add.Tensor %1778, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1782 = torch.aten.rsqrt %1781 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1783 = torch.aten.sub.Tensor %1765, %1780, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %1784 = torch.aten.mul.Tensor %1783, %1782 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %1785 = torch.aten.view %1784, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %1786 = torch.aten.unsqueeze %138, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1787 = torch.aten.unsqueeze %1786, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1788 = torch.aten.mul.Tensor %1785, %1787 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %1789 = torch.aten.unsqueeze %139, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %1790 = torch.aten.unsqueeze %1789, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %1791 = torch.aten.add.Tensor %1788, %1790, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %1792 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1793 = torch.aten.to.dtype %1792, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1794 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1795 = torch.aten.broadcast_to %1793, %1794 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1796 = torch.valsem.aten.copy %1795, %1791, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %1797 = torch.aten.convolution %1796, %140, %141, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1798 = torch.aten.permute %1797, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %1799 = torch.aten.view %1798, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1800 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1801 = torch.aten.sum.dim_IntList %1799, %1800, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1802 = torch.aten.div.Scalar %1801, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1803 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1804 = torch.aten.broadcast_to %1802, %1803 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1805 = torch.aten.sub.Tensor %1799, %1804, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1806 = torch.aten.mul.Tensor %1805, %1805 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1807 = torch.aten.sum.dim_IntList %1806, %1800, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1808 = torch.aten.div.Scalar %1807, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1809 = torch.aten.add.Scalar %1808, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1810 = torch.aten.rsqrt %1809 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %1811 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1812 = torch.aten.broadcast_to %1810, %1811 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1813 = torch.aten.mul.Tensor %1805, %1812 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1814 = torch.aten.mul.Tensor %1813, %142 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1815 = torch.aten.add.Tensor %1814, %143, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1816 = torch.aten.transpose.int %144, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1817 = torch.aten.view %1815, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1818 = torch.aten.mm %1817, %1816 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1819 = torch.aten.view %1818, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1820 = torch.aten.transpose.int %145, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1821 = torch.aten.view %1815, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1822 = torch.aten.mm %1821, %1820 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1823 = torch.aten.view %1822, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1824 = torch.aten.transpose.int %146, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1825 = torch.aten.view %1815, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1826 = torch.aten.mm %1825, %1824 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1827 = torch.aten.view %1826, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1828 = torch.aten.view %1819, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1829 = torch.aten.permute %1828, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1830 = torch.aten.clone %1829, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1831 = torch.aten.view %1830, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1832 = torch.aten.view %1823, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1833 = torch.aten.permute %1832, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1834 = torch.aten.clone %1833, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1835 = torch.aten.view %1834, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1836 = torch.aten.view %1827, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1837 = torch.aten.permute %1836, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1838 = torch.aten.clone %1837, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1839 = torch.aten.view %1838, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1840 = torch.aten.transpose.int %1835, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
    %1841 = torch.aten.broadcast_to %1831, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1842 = torch.aten.view %1841, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1843 = torch.aten.broadcast_to %1840, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %1844 = torch.aten.view %1843, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %1845 = torch.aten.bmm %1842, %1844 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %1846 = torch.aten.view %1845, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %1847 = torch.aten.mul.Tensor %1846, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
    %values_10, %indices_11 = torch.aten.max.dim %1847, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %1848 = torch.aten.sub.Tensor %1847, %values_10, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
    %1849 = torch.aten.exp %1848 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %1850 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1851 = torch.aten.sum.dim_IntList %1849, %1850, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %1852 = torch.aten.div.Tensor %1849, %1851 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %1853 = torch.aten.broadcast_to %1852, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %1854 = torch.aten.view %1853, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %1855 = torch.aten.broadcast_to %1839, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1856 = torch.aten.view %1855, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1857 = torch.aten.bmm %1854, %1856 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %1858 = torch.aten.view %1857, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1859 = torch.aten.view %1858, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1860 = torch.aten.permute %1859, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1861 = torch.aten.clone %1860, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %1862 = torch.aten.view %1861, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1863 = torch.aten.transpose.int %147, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1864 = torch.aten.view %1862, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1865 = torch.aten.mm %1864, %1863 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1866 = torch.aten.mul.Scalar %148, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1867 = torch.aten.add.Tensor %1866, %1865, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %1868 = torch.aten.view %1867, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1869 = torch.aten.add.Tensor %1868, %1799, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1870 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1871 = torch.aten.sum.dim_IntList %1869, %1870, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1872 = torch.aten.div.Scalar %1871, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1873 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1874 = torch.aten.broadcast_to %1872, %1873 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1875 = torch.aten.sub.Tensor %1869, %1874, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1876 = torch.aten.mul.Tensor %1875, %1875 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1877 = torch.aten.sum.dim_IntList %1876, %1870, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1878 = torch.aten.div.Scalar %1877, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1879 = torch.aten.add.Scalar %1878, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1880 = torch.aten.rsqrt %1879 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %1881 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1882 = torch.aten.broadcast_to %1880, %1881 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1883 = torch.aten.mul.Tensor %1875, %1882 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1884 = torch.aten.mul.Tensor %1883, %149 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1885 = torch.aten.add.Tensor %1884, %150, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1886 = torch.aten.transpose.int %151, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1887 = torch.aten.view %1885, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1888 = torch.aten.mm %1887, %1886 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1889 = torch.aten.view %1888, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1890 = torch.aten.transpose.int %152, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %1891 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %1892 = torch.aten.mm %1891, %1890 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %1893 = torch.aten.view %1892, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %1894 = torch.aten.transpose.int %153, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %1895 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %1896 = torch.aten.mm %1895, %1894 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %1897 = torch.aten.view %1896, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %1898 = torch.aten.view %1889, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1899 = torch.aten.permute %1898, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1900 = torch.aten.clone %1899, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %1901 = torch.aten.view %1900, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1902 = torch.aten.view %1893, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %1903 = torch.aten.permute %1902, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %1904 = torch.aten.clone %1903, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %1905 = torch.aten.view %1904, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1906 = torch.aten.view %1897, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %1907 = torch.aten.permute %1906, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %1908 = torch.aten.clone %1907, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %1909 = torch.aten.view %1908, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1910 = torch.aten.transpose.int %1905, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
    %1911 = torch.aten.broadcast_to %1901, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1912 = torch.aten.view %1911, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1913 = torch.aten.broadcast_to %1910, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %1914 = torch.aten.view %1913, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %1915 = torch.aten.bmm %1912, %1914 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %1916 = torch.aten.view %1915, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %1917 = torch.aten.mul.Tensor %1916, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
    %values_12, %indices_13 = torch.aten.max.dim %1917, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %1918 = torch.aten.sub.Tensor %1917, %values_12, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
    %1919 = torch.aten.exp %1918 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %1920 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1921 = torch.aten.sum.dim_IntList %1919, %1920, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %1922 = torch.aten.div.Tensor %1919, %1921 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
    %1923 = torch.aten.broadcast_to %1922, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %1924 = torch.aten.view %1923, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %1925 = torch.aten.broadcast_to %1909, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1926 = torch.aten.view %1925, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %1927 = torch.aten.bmm %1924, %1926 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %1928 = torch.aten.view %1927, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %1929 = torch.aten.view %1928, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %1930 = torch.aten.permute %1929, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %1931 = torch.aten.clone %1930, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %1932 = torch.aten.view %1931, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1933 = torch.aten.transpose.int %154, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %1934 = torch.aten.view %1932, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1935 = torch.aten.mm %1934, %1933 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1936 = torch.aten.mul.Scalar %155, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1937 = torch.aten.add.Tensor %1936, %1935, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %1938 = torch.aten.view %1937, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1939 = torch.aten.add.Tensor %1938, %1869, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1940 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1941 = torch.aten.sum.dim_IntList %1939, %1940, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1942 = torch.aten.div.Scalar %1941, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1943 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1944 = torch.aten.broadcast_to %1942, %1943 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1945 = torch.aten.sub.Tensor %1939, %1944, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1946 = torch.aten.mul.Tensor %1945, %1945 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1947 = torch.aten.sum.dim_IntList %1946, %1940, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %1948 = torch.aten.div.Scalar %1947, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1949 = torch.aten.add.Scalar %1948, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %1950 = torch.aten.rsqrt %1949 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %1951 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1952 = torch.aten.broadcast_to %1950, %1951 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1953 = torch.aten.mul.Tensor %1945, %1952 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1954 = torch.aten.mul.Tensor %1953, %156 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %1955 = torch.aten.add.Tensor %1954, %157, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1956 = torch.aten.transpose.int %158, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %1957 = torch.aten.view %1955, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %1958 = torch.aten.mm %1957, %1956 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
    %1959 = torch.aten.mul.Scalar %159, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %1960 = torch.aten.add.Tensor %1959, %1958, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
    %1961 = torch.aten.view %1960, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
    %1962 = torch.aten.slice.Tensor %1961, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %1963 = torch.aten.slice.Tensor %1961, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %1964 = torch.aten.gelu %1963, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
    %1965 = torch.aten.mul.Tensor %1962, %1964 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
    %1966 = torch.aten.transpose.int %160, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %1967 = torch.aten.view %1965, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
    %1968 = torch.aten.mm %1967, %1966 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
    %1969 = torch.aten.mul.Scalar %161, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %1970 = torch.aten.add.Tensor %1969, %1968, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %1971 = torch.aten.view %1970, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %1972 = torch.aten.add.Tensor %1971, %1939, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %1973 = torch.aten.view %1972, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %1974 = torch.aten.permute %1973, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %1975 = torch.aten.convolution %1974, %162, %163, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1976 = torch.aten.add.Tensor %1975, %1763, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %1977 = torch.aten.convolution %1976, %164, %165, %1361, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,16,16],f16>
    %1978 = torch.aten.clone %1977, %int0 : !torch.vtensor<[2,640,16,16],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16>
    %1979 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1980 = torch.aten.view %1978, %1979 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16>
    %1981 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1982 = torch.aten.to.dtype %1981, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1983 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1984 = torch.aten.broadcast_to %1982, %1983 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f32>
    %1985 = torch.valsem.aten.copy %1984, %1980, %false : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,20,256],f16>, !torch.bool -> !torch.vtensor<[2,32,20,256],f32>
    %1986 = torch.aten.to.dtype %1985, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f64>
    %1987 = torch.aten.sum.dim_IntList %1986, %754, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1988 = torch.aten.div.Scalar %1987, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1989 = torch.aten.sub.Tensor %1986, %1988, %float1.000000e00 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,256],f64>
    %1990 = torch.aten.mul.Tensor %1989, %1989 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,20,256],f64> -> !torch.vtensor<[2,32,20,256],f64>
    %1991 = torch.aten.sum.dim_IntList %1990, %754, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %1992 = torch.aten.div.Scalar %1991, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %1993 = torch.aten.to.dtype %1992, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1994 = torch.aten.sum.dim_IntList %1985, %754, %true, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %1995 = torch.aten.div.Scalar %1994, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1996 = torch.aten.add.Tensor %1993, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %1997 = torch.aten.rsqrt %1996 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %1998 = torch.aten.sub.Tensor %1980, %1995, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,256],f32>
    %1999 = torch.aten.mul.Tensor %1998, %1997 : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,256],f32>
    %2000 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2001 = torch.aten.view %1999, %2000 : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f32>
    %2002 = torch.aten.unsqueeze %166, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %2003 = torch.aten.unsqueeze %2002, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %2004 = torch.aten.mul.Tensor %2001, %2003 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f32>
    %2005 = torch.aten.unsqueeze %167, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %2006 = torch.aten.unsqueeze %2005, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %2007 = torch.aten.add.Tensor %2004, %2006, %int1 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f32>
    %2008 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2009 = torch.aten.to.dtype %2008, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2010 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2011 = torch.aten.broadcast_to %2009, %2010 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16>
    %2012 = torch.valsem.aten.copy %2011, %2007, %false : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f32>, !torch.bool -> !torch.vtensor<[2,640,16,16],f16>
    %2013 = torch.aten.sigmoid %2012 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16>
    %2014 = torch.aten.mul.Tensor %2013, %2012 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16>
    %2015 = torch.aten.convolution %2014, %168, %169, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2016 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2017 = torch.aten.mul.Tensor %2016, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2018 = torch.aten.transpose.int %170, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2019 = torch.aten.mm %2017, %2018 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2020 = torch.aten.mul.Scalar %171, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2021 = torch.aten.add.Tensor %2020, %2019, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2022 = torch.aten.slice.Tensor %2021, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2023 = torch.aten.slice.Tensor %2022, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2024 = torch.aten.unsqueeze %2023, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %2025 = torch.aten.unsqueeze %2024, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %2026 = torch.aten.add.Tensor %2015, %2025, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2027 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2028 = torch.aten.view %2026, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %2029 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2030 = torch.aten.to.dtype %2029, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2031 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2032 = torch.aten.broadcast_to %2030, %2031 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %2033 = torch.valsem.aten.copy %2032, %2028, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %2034 = torch.aten.to.dtype %2033, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %2035 = torch.aten.sum.dim_IntList %2034, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2036 = torch.aten.div.Scalar %2035, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2037 = torch.aten.sub.Tensor %2034, %2036, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %2038 = torch.aten.mul.Tensor %2037, %2037 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %2039 = torch.aten.sum.dim_IntList %2038, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2040 = torch.aten.div.Scalar %2039, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2041 = torch.aten.to.dtype %2040, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2042 = torch.aten.sum.dim_IntList %2033, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2043 = torch.aten.div.Scalar %2042, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2044 = torch.aten.add.Tensor %2041, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2045 = torch.aten.rsqrt %2044 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2046 = torch.aten.sub.Tensor %2028, %2043, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %2047 = torch.aten.mul.Tensor %2046, %2045 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %2048 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2049 = torch.aten.view %2047, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %2050 = torch.aten.unsqueeze %172, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2051 = torch.aten.unsqueeze %2050, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2052 = torch.aten.mul.Tensor %2049, %2051 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %2053 = torch.aten.unsqueeze %173, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2054 = torch.aten.unsqueeze %2053, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2055 = torch.aten.add.Tensor %2052, %2054, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %2056 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2057 = torch.aten.to.dtype %2056, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2058 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2059 = torch.aten.broadcast_to %2057, %2058 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2060 = torch.valsem.aten.copy %2059, %2055, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %2061 = torch.aten.sigmoid %2060 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %2062 = torch.aten.mul.Tensor %2061, %2060 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %2063 = torch.aten.convolution %2062, %174, %175, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2064 = torch.aten.convolution %1977, %176, %177, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2065 = torch.aten.add.Tensor %2064, %2063, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2066 = torch.aten.div.Tensor %2065, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
    %2067 = torch.aten.clone %2066, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2068 = torch.aten.view %2067, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %2069 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2070 = torch.aten.to.dtype %2069, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2071 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2072 = torch.aten.broadcast_to %2070, %2071 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %2073 = torch.valsem.aten.copy %2072, %2068, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %2074 = torch.aten.to.dtype %2073, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %2075 = torch.aten.sum.dim_IntList %2074, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2076 = torch.aten.div.Scalar %2075, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2077 = torch.aten.sub.Tensor %2074, %2076, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %2078 = torch.aten.mul.Tensor %2077, %2077 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %2079 = torch.aten.sum.dim_IntList %2078, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2080 = torch.aten.div.Scalar %2079, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2081 = torch.aten.to.dtype %2080, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2082 = torch.aten.sum.dim_IntList %2073, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2083 = torch.aten.div.Scalar %2082, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2084 = torch.aten.add.Tensor %2081, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2085 = torch.aten.rsqrt %2084 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2086 = torch.aten.sub.Tensor %2068, %2083, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %2087 = torch.aten.mul.Tensor %2086, %2085 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %2088 = torch.aten.view %2087, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %2089 = torch.aten.unsqueeze %178, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2090 = torch.aten.unsqueeze %2089, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2091 = torch.aten.mul.Tensor %2088, %2090 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %2092 = torch.aten.unsqueeze %179, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2093 = torch.aten.unsqueeze %2092, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2094 = torch.aten.add.Tensor %2091, %2093, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %2095 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2096 = torch.aten.to.dtype %2095, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2097 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2098 = torch.aten.broadcast_to %2096, %2097 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2099 = torch.valsem.aten.copy %2098, %2094, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %2100 = torch.aten.convolution %2099, %180, %181, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2101 = torch.aten.permute %2100, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %2102 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2103 = torch.aten.view %2101, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2104 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2105 = torch.aten.sum.dim_IntList %2103, %2104, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2106 = torch.aten.div.Scalar %2105, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2107 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2108 = torch.aten.broadcast_to %2106, %2107 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2109 = torch.aten.sub.Tensor %2103, %2108, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2110 = torch.aten.mul.Tensor %2109, %2109 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2111 = torch.aten.sum.dim_IntList %2110, %2104, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2112 = torch.aten.div.Scalar %2111, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2113 = torch.aten.add.Scalar %2112, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2114 = torch.aten.rsqrt %2113 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %2115 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2116 = torch.aten.broadcast_to %2114, %2115 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2117 = torch.aten.mul.Tensor %2109, %2116 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2118 = torch.aten.mul.Tensor %2117, %182 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2119 = torch.aten.add.Tensor %2118, %183, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2120 = torch.aten.transpose.int %184, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2121 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %2122 = torch.aten.view %2119, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2123 = torch.aten.mm %2122, %2120 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2124 = torch.aten.view %2123, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2125 = torch.aten.transpose.int %185, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2126 = torch.aten.view %2119, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2127 = torch.aten.mm %2126, %2125 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2128 = torch.aten.view %2127, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2129 = torch.aten.transpose.int %186, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2130 = torch.aten.view %2119, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2131 = torch.aten.mm %2130, %2129 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2132 = torch.aten.view %2131, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2133 = torch.prim.ListConstruct %int2, %int256, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2134 = torch.aten.view %2124, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2135 = torch.aten.permute %2134, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2136 = torch.aten.clone %2135, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2137 = torch.prim.ListConstruct %int16, %int256, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2138 = torch.aten.view %2136, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2139 = torch.aten.view %2128, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2140 = torch.aten.permute %2139, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2141 = torch.aten.clone %2140, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2142 = torch.aten.view %2141, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2143 = torch.aten.view %2132, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2144 = torch.aten.permute %2143, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2145 = torch.aten.clone %2144, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2146 = torch.aten.view %2145, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2147 = torch.aten.transpose.int %2142, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
    %2148 = torch.aten.broadcast_to %2138, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2149 = torch.aten.view %2148, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2150 = torch.prim.ListConstruct %int16, %int160, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2151 = torch.aten.broadcast_to %2147, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %2152 = torch.aten.view %2151, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %2153 = torch.aten.bmm %2149, %2152 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %2154 = torch.prim.ListConstruct %int16, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2155 = torch.aten.view %2153, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %2156 = torch.aten.mul.Tensor %2155, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
    %values_14, %indices_15 = torch.aten.max.dim %2156, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %2157 = torch.aten.sub.Tensor %2156, %values_14, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
    %2158 = torch.aten.exp %2157 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %2159 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2160 = torch.aten.sum.dim_IntList %2158, %2159, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %2161 = torch.aten.div.Tensor %2158, %2160 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
    %2162 = torch.aten.broadcast_to %2161, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %2163 = torch.aten.view %2162, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %2164 = torch.aten.broadcast_to %2146, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2165 = torch.aten.view %2164, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2166 = torch.aten.bmm %2163, %2165 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %2167 = torch.aten.view %2166, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2168 = torch.prim.ListConstruct %int2, %int8, %int256, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2169 = torch.aten.view %2167, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2170 = torch.aten.permute %2169, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2171 = torch.aten.clone %2170, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %2172 = torch.aten.view %2171, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2173 = torch.aten.transpose.int %187, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2174 = torch.aten.view %2172, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2175 = torch.aten.mm %2174, %2173 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2176 = torch.aten.mul.Scalar %188, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2177 = torch.aten.add.Tensor %2176, %2175, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %2178 = torch.aten.view %2177, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2179 = torch.aten.add.Tensor %2178, %2103, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2180 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2181 = torch.aten.sum.dim_IntList %2179, %2180, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2182 = torch.aten.div.Scalar %2181, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2183 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2184 = torch.aten.broadcast_to %2182, %2183 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2185 = torch.aten.sub.Tensor %2179, %2184, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2186 = torch.aten.mul.Tensor %2185, %2185 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2187 = torch.aten.sum.dim_IntList %2186, %2180, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2188 = torch.aten.div.Scalar %2187, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2189 = torch.aten.add.Scalar %2188, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2190 = torch.aten.rsqrt %2189 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %2191 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2192 = torch.aten.broadcast_to %2190, %2191 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2193 = torch.aten.mul.Tensor %2185, %2192 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2194 = torch.aten.mul.Tensor %2193, %189 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2195 = torch.aten.add.Tensor %2194, %190, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2196 = torch.aten.transpose.int %191, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2197 = torch.aten.view %2195, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2198 = torch.aten.mm %2197, %2196 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2199 = torch.aten.view %2198, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2200 = torch.aten.transpose.int %192, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %2201 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %2202 = torch.aten.mm %2201, %2200 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %2203 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2204 = torch.aten.view %2202, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %2205 = torch.aten.transpose.int %193, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %2206 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %2207 = torch.aten.mm %2206, %2205 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %2208 = torch.aten.view %2207, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %2209 = torch.aten.view %2199, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2210 = torch.aten.permute %2209, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2211 = torch.aten.clone %2210, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2212 = torch.aten.view %2211, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2213 = torch.prim.ListConstruct %int2, %int77, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2214 = torch.aten.view %2204, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %2215 = torch.aten.permute %2214, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %2216 = torch.aten.clone %2215, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %2217 = torch.prim.ListConstruct %int16, %int77, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2218 = torch.aten.view %2216, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2219 = torch.aten.view %2208, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %2220 = torch.aten.permute %2219, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %2221 = torch.aten.clone %2220, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %2222 = torch.aten.view %2221, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2223 = torch.aten.transpose.int %2218, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
    %2224 = torch.aten.broadcast_to %2212, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2225 = torch.aten.view %2224, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2226 = torch.prim.ListConstruct %int16, %int160, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2227 = torch.aten.broadcast_to %2223, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %2228 = torch.aten.view %2227, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %2229 = torch.aten.bmm %2225, %2228 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %2230 = torch.prim.ListConstruct %int16, %int256, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2231 = torch.aten.view %2229, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %2232 = torch.aten.mul.Tensor %2231, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
    %values_16, %indices_17 = torch.aten.max.dim %2232, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %2233 = torch.aten.sub.Tensor %2232, %values_16, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
    %2234 = torch.aten.exp %2233 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %2235 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2236 = torch.aten.sum.dim_IntList %2234, %2235, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %2237 = torch.aten.div.Tensor %2234, %2236 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
    %2238 = torch.aten.broadcast_to %2237, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %2239 = torch.aten.view %2238, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %2240 = torch.aten.broadcast_to %2222, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2241 = torch.aten.view %2240, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2242 = torch.aten.bmm %2239, %2241 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %2243 = torch.aten.view %2242, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2244 = torch.aten.view %2243, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2245 = torch.aten.permute %2244, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2246 = torch.aten.clone %2245, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %2247 = torch.aten.view %2246, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2248 = torch.aten.transpose.int %194, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2249 = torch.aten.view %2247, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2250 = torch.aten.mm %2249, %2248 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2251 = torch.aten.mul.Scalar %195, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2252 = torch.aten.add.Tensor %2251, %2250, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %2253 = torch.aten.view %2252, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2254 = torch.aten.add.Tensor %2253, %2179, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2255 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2256 = torch.aten.sum.dim_IntList %2254, %2255, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2257 = torch.aten.div.Scalar %2256, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2258 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2259 = torch.aten.broadcast_to %2257, %2258 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2260 = torch.aten.sub.Tensor %2254, %2259, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2261 = torch.aten.mul.Tensor %2260, %2260 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2262 = torch.aten.sum.dim_IntList %2261, %2255, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2263 = torch.aten.div.Scalar %2262, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2264 = torch.aten.add.Scalar %2263, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2265 = torch.aten.rsqrt %2264 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %2266 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2267 = torch.aten.broadcast_to %2265, %2266 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2268 = torch.aten.mul.Tensor %2260, %2267 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2269 = torch.aten.mul.Tensor %2268, %196 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2270 = torch.aten.add.Tensor %2269, %197, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2271 = torch.aten.transpose.int %198, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %2272 = torch.aten.view %2270, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2273 = torch.aten.mm %2272, %2271 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
    %2274 = torch.aten.mul.Scalar %199, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %2275 = torch.aten.add.Tensor %2274, %2273, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
    %2276 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2277 = torch.aten.view %2275, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
    %2278 = torch.aten.slice.Tensor %2277, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %2279 = torch.aten.slice.Tensor %2277, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %2280 = torch.aten.gelu %2279, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
    %2281 = torch.aten.mul.Tensor %2278, %2280 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
    %2282 = torch.aten.transpose.int %200, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %2283 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
    %2284 = torch.aten.view %2281, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
    %2285 = torch.aten.mm %2284, %2282 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2286 = torch.aten.mul.Scalar %201, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2287 = torch.aten.add.Tensor %2286, %2285, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %2288 = torch.aten.view %2287, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2289 = torch.aten.add.Tensor %2288, %2254, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2290 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2291 = torch.aten.view %2289, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %2292 = torch.aten.permute %2291, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2293 = torch.aten.convolution %2292, %202, %203, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2294 = torch.aten.add.Tensor %2293, %2066, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2295 = torch.aten.clone %2294, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2296 = torch.aten.view %2295, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %2297 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2298 = torch.aten.to.dtype %2297, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2299 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2300 = torch.aten.broadcast_to %2298, %2299 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %2301 = torch.valsem.aten.copy %2300, %2296, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %2302 = torch.aten.to.dtype %2301, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %2303 = torch.aten.sum.dim_IntList %2302, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2304 = torch.aten.div.Scalar %2303, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2305 = torch.aten.sub.Tensor %2302, %2304, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %2306 = torch.aten.mul.Tensor %2305, %2305 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %2307 = torch.aten.sum.dim_IntList %2306, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2308 = torch.aten.div.Scalar %2307, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2309 = torch.aten.to.dtype %2308, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2310 = torch.aten.sum.dim_IntList %2301, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2311 = torch.aten.div.Scalar %2310, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2312 = torch.aten.add.Tensor %2309, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2313 = torch.aten.rsqrt %2312 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2314 = torch.aten.sub.Tensor %2296, %2311, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %2315 = torch.aten.mul.Tensor %2314, %2313 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %2316 = torch.aten.view %2315, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %2317 = torch.aten.unsqueeze %204, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2318 = torch.aten.unsqueeze %2317, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2319 = torch.aten.mul.Tensor %2316, %2318 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %2320 = torch.aten.unsqueeze %205, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2321 = torch.aten.unsqueeze %2320, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2322 = torch.aten.add.Tensor %2319, %2321, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %2323 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2324 = torch.aten.to.dtype %2323, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2325 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2326 = torch.aten.broadcast_to %2324, %2325 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2327 = torch.valsem.aten.copy %2326, %2322, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %2328 = torch.aten.sigmoid %2327 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %2329 = torch.aten.mul.Tensor %2328, %2327 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %2330 = torch.aten.convolution %2329, %206, %207, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2331 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2332 = torch.aten.mul.Tensor %2331, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2333 = torch.aten.transpose.int %208, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2334 = torch.aten.mm %2332, %2333 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2335 = torch.aten.mul.Scalar %209, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2336 = torch.aten.add.Tensor %2335, %2334, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2337 = torch.aten.slice.Tensor %2336, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2338 = torch.aten.slice.Tensor %2337, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2339 = torch.aten.unsqueeze %2338, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %2340 = torch.aten.unsqueeze %2339, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %2341 = torch.aten.add.Tensor %2330, %2340, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2342 = torch.aten.view %2341, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %2343 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2344 = torch.aten.to.dtype %2343, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2345 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2346 = torch.aten.broadcast_to %2344, %2345 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %2347 = torch.valsem.aten.copy %2346, %2342, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %2348 = torch.aten.to.dtype %2347, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %2349 = torch.aten.sum.dim_IntList %2348, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2350 = torch.aten.div.Scalar %2349, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2351 = torch.aten.sub.Tensor %2348, %2350, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %2352 = torch.aten.mul.Tensor %2351, %2351 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %2353 = torch.aten.sum.dim_IntList %2352, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2354 = torch.aten.div.Scalar %2353, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2355 = torch.aten.to.dtype %2354, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2356 = torch.aten.sum.dim_IntList %2347, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2357 = torch.aten.div.Scalar %2356, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2358 = torch.aten.add.Tensor %2355, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2359 = torch.aten.rsqrt %2358 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2360 = torch.aten.sub.Tensor %2342, %2357, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %2361 = torch.aten.mul.Tensor %2360, %2359 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %2362 = torch.aten.view %2361, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %2363 = torch.aten.unsqueeze %210, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2364 = torch.aten.unsqueeze %2363, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2365 = torch.aten.mul.Tensor %2362, %2364 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %2366 = torch.aten.unsqueeze %211, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2367 = torch.aten.unsqueeze %2366, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2368 = torch.aten.add.Tensor %2365, %2367, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %2369 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2370 = torch.aten.to.dtype %2369, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2371 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2372 = torch.aten.broadcast_to %2370, %2371 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2373 = torch.valsem.aten.copy %2372, %2368, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %2374 = torch.aten.sigmoid %2373 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %2375 = torch.aten.mul.Tensor %2374, %2373 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %2376 = torch.aten.convolution %2375, %212, %213, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2377 = torch.aten.add.Tensor %2294, %2376, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2378 = torch.aten.div.Tensor %2377, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
    %2379 = torch.aten.clone %2378, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2380 = torch.aten.view %2379, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %2381 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2382 = torch.aten.to.dtype %2381, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2383 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2384 = torch.aten.broadcast_to %2382, %2383 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %2385 = torch.valsem.aten.copy %2384, %2380, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %2386 = torch.aten.to.dtype %2385, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %2387 = torch.aten.sum.dim_IntList %2386, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2388 = torch.aten.div.Scalar %2387, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2389 = torch.aten.sub.Tensor %2386, %2388, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %2390 = torch.aten.mul.Tensor %2389, %2389 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %2391 = torch.aten.sum.dim_IntList %2390, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2392 = torch.aten.div.Scalar %2391, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2393 = torch.aten.to.dtype %2392, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2394 = torch.aten.sum.dim_IntList %2385, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2395 = torch.aten.div.Scalar %2394, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2396 = torch.aten.add.Tensor %2393, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2397 = torch.aten.rsqrt %2396 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2398 = torch.aten.sub.Tensor %2380, %2395, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %2399 = torch.aten.mul.Tensor %2398, %2397 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %2400 = torch.aten.view %2399, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %2401 = torch.aten.unsqueeze %214, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2402 = torch.aten.unsqueeze %2401, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2403 = torch.aten.mul.Tensor %2400, %2402 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %2404 = torch.aten.unsqueeze %215, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2405 = torch.aten.unsqueeze %2404, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2406 = torch.aten.add.Tensor %2403, %2405, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %2407 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2408 = torch.aten.to.dtype %2407, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2409 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2410 = torch.aten.broadcast_to %2408, %2409 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2411 = torch.valsem.aten.copy %2410, %2406, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %2412 = torch.aten.convolution %2411, %216, %217, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2413 = torch.aten.permute %2412, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %2414 = torch.aten.view %2413, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2415 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2416 = torch.aten.sum.dim_IntList %2414, %2415, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2417 = torch.aten.div.Scalar %2416, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2418 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2419 = torch.aten.broadcast_to %2417, %2418 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2420 = torch.aten.sub.Tensor %2414, %2419, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2421 = torch.aten.mul.Tensor %2420, %2420 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2422 = torch.aten.sum.dim_IntList %2421, %2415, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2423 = torch.aten.div.Scalar %2422, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2424 = torch.aten.add.Scalar %2423, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2425 = torch.aten.rsqrt %2424 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %2426 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2427 = torch.aten.broadcast_to %2425, %2426 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2428 = torch.aten.mul.Tensor %2420, %2427 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2429 = torch.aten.mul.Tensor %2428, %218 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2430 = torch.aten.add.Tensor %2429, %219, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2431 = torch.aten.transpose.int %220, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2432 = torch.aten.view %2430, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2433 = torch.aten.mm %2432, %2431 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2434 = torch.aten.view %2433, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2435 = torch.aten.transpose.int %221, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2436 = torch.aten.view %2430, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2437 = torch.aten.mm %2436, %2435 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2438 = torch.aten.view %2437, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2439 = torch.aten.transpose.int %222, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2440 = torch.aten.view %2430, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2441 = torch.aten.mm %2440, %2439 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2442 = torch.aten.view %2441, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2443 = torch.aten.view %2434, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2444 = torch.aten.permute %2443, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2445 = torch.aten.clone %2444, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2446 = torch.aten.view %2445, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2447 = torch.aten.view %2438, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2448 = torch.aten.permute %2447, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2449 = torch.aten.clone %2448, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2450 = torch.aten.view %2449, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2451 = torch.aten.view %2442, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2452 = torch.aten.permute %2451, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2453 = torch.aten.clone %2452, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2454 = torch.aten.view %2453, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2455 = torch.aten.transpose.int %2450, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
    %2456 = torch.aten.broadcast_to %2446, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2457 = torch.aten.view %2456, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2458 = torch.aten.broadcast_to %2455, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %2459 = torch.aten.view %2458, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %2460 = torch.aten.bmm %2457, %2459 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %2461 = torch.aten.view %2460, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %2462 = torch.aten.mul.Tensor %2461, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
    %values_18, %indices_19 = torch.aten.max.dim %2462, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %2463 = torch.aten.sub.Tensor %2462, %values_18, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
    %2464 = torch.aten.exp %2463 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %2465 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2466 = torch.aten.sum.dim_IntList %2464, %2465, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %2467 = torch.aten.div.Tensor %2464, %2466 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
    %2468 = torch.aten.broadcast_to %2467, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %2469 = torch.aten.view %2468, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %2470 = torch.aten.broadcast_to %2454, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2471 = torch.aten.view %2470, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2472 = torch.aten.bmm %2469, %2471 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %2473 = torch.aten.view %2472, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2474 = torch.aten.view %2473, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2475 = torch.aten.permute %2474, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2476 = torch.aten.clone %2475, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %2477 = torch.aten.view %2476, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2478 = torch.aten.transpose.int %223, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2479 = torch.aten.view %2477, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2480 = torch.aten.mm %2479, %2478 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2481 = torch.aten.mul.Scalar %224, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2482 = torch.aten.add.Tensor %2481, %2480, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %2483 = torch.aten.view %2482, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2484 = torch.aten.add.Tensor %2483, %2414, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2485 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2486 = torch.aten.sum.dim_IntList %2484, %2485, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2487 = torch.aten.div.Scalar %2486, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2488 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2489 = torch.aten.broadcast_to %2487, %2488 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2490 = torch.aten.sub.Tensor %2484, %2489, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2491 = torch.aten.mul.Tensor %2490, %2490 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2492 = torch.aten.sum.dim_IntList %2491, %2485, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2493 = torch.aten.div.Scalar %2492, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2494 = torch.aten.add.Scalar %2493, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2495 = torch.aten.rsqrt %2494 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %2496 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2497 = torch.aten.broadcast_to %2495, %2496 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2498 = torch.aten.mul.Tensor %2490, %2497 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2499 = torch.aten.mul.Tensor %2498, %225 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2500 = torch.aten.add.Tensor %2499, %226, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2501 = torch.aten.transpose.int %227, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2502 = torch.aten.view %2500, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2503 = torch.aten.mm %2502, %2501 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2504 = torch.aten.view %2503, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2505 = torch.aten.transpose.int %228, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %2506 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %2507 = torch.aten.mm %2506, %2505 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %2508 = torch.aten.view %2507, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %2509 = torch.aten.transpose.int %229, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %2510 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %2511 = torch.aten.mm %2510, %2509 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %2512 = torch.aten.view %2511, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %2513 = torch.aten.view %2504, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2514 = torch.aten.permute %2513, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2515 = torch.aten.clone %2514, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %2516 = torch.aten.view %2515, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2517 = torch.aten.view %2508, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %2518 = torch.aten.permute %2517, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %2519 = torch.aten.clone %2518, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %2520 = torch.aten.view %2519, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2521 = torch.aten.view %2512, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %2522 = torch.aten.permute %2521, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %2523 = torch.aten.clone %2522, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %2524 = torch.aten.view %2523, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2525 = torch.aten.transpose.int %2520, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
    %2526 = torch.aten.broadcast_to %2516, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2527 = torch.aten.view %2526, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2528 = torch.aten.broadcast_to %2525, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %2529 = torch.aten.view %2528, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %2530 = torch.aten.bmm %2527, %2529 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %2531 = torch.aten.view %2530, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %2532 = torch.aten.mul.Tensor %2531, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
    %values_20, %indices_21 = torch.aten.max.dim %2532, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %2533 = torch.aten.sub.Tensor %2532, %values_20, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
    %2534 = torch.aten.exp %2533 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %2535 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2536 = torch.aten.sum.dim_IntList %2534, %2535, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %2537 = torch.aten.div.Tensor %2534, %2536 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
    %2538 = torch.aten.broadcast_to %2537, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %2539 = torch.aten.view %2538, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %2540 = torch.aten.broadcast_to %2524, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2541 = torch.aten.view %2540, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2542 = torch.aten.bmm %2539, %2541 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %2543 = torch.aten.view %2542, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %2544 = torch.aten.view %2543, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %2545 = torch.aten.permute %2544, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %2546 = torch.aten.clone %2545, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %2547 = torch.aten.view %2546, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2548 = torch.aten.transpose.int %230, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2549 = torch.aten.view %2547, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2550 = torch.aten.mm %2549, %2548 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2551 = torch.aten.mul.Scalar %231, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2552 = torch.aten.add.Tensor %2551, %2550, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %2553 = torch.aten.view %2552, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2554 = torch.aten.add.Tensor %2553, %2484, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2555 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2556 = torch.aten.sum.dim_IntList %2554, %2555, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2557 = torch.aten.div.Scalar %2556, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2558 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2559 = torch.aten.broadcast_to %2557, %2558 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2560 = torch.aten.sub.Tensor %2554, %2559, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2561 = torch.aten.mul.Tensor %2560, %2560 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2562 = torch.aten.sum.dim_IntList %2561, %2555, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %2563 = torch.aten.div.Scalar %2562, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2564 = torch.aten.add.Scalar %2563, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %2565 = torch.aten.rsqrt %2564 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %2566 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2567 = torch.aten.broadcast_to %2565, %2566 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2568 = torch.aten.mul.Tensor %2560, %2567 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2569 = torch.aten.mul.Tensor %2568, %232 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %2570 = torch.aten.add.Tensor %2569, %233, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2571 = torch.aten.transpose.int %234, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %2572 = torch.aten.view %2570, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %2573 = torch.aten.mm %2572, %2571 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
    %2574 = torch.aten.mul.Scalar %235, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %2575 = torch.aten.add.Tensor %2574, %2573, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
    %2576 = torch.aten.view %2575, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
    %2577 = torch.aten.slice.Tensor %2576, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %2578 = torch.aten.slice.Tensor %2576, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %2579 = torch.aten.gelu %2578, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
    %2580 = torch.aten.mul.Tensor %2577, %2579 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
    %2581 = torch.aten.transpose.int %236, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %2582 = torch.aten.view %2580, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
    %2583 = torch.aten.mm %2582, %2581 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %2584 = torch.aten.mul.Scalar %237, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2585 = torch.aten.add.Tensor %2584, %2583, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %2586 = torch.aten.view %2585, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %2587 = torch.aten.add.Tensor %2586, %2554, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %2588 = torch.aten.view %2587, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %2589 = torch.aten.permute %2588, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %2590 = torch.aten.convolution %2589, %238, %239, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2591 = torch.aten.add.Tensor %2590, %2378, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %2592 = torch.aten.convolution %2591, %240, %241, %1361, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2593 = torch.aten.clone %2592, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2594 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2595 = torch.aten.view %2593, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2596 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2597 = torch.aten.to.dtype %2596, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2598 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2599 = torch.aten.broadcast_to %2597, %2598 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2600 = torch.valsem.aten.copy %2599, %2595, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2601 = torch.aten.to.dtype %2600, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2602 = torch.aten.sum.dim_IntList %2601, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2603 = torch.aten.div.Scalar %2602, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2604 = torch.aten.sub.Tensor %2601, %2603, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2605 = torch.aten.mul.Tensor %2604, %2604 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2606 = torch.aten.sum.dim_IntList %2605, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2607 = torch.aten.div.Scalar %2606, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2608 = torch.aten.to.dtype %2607, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2609 = torch.aten.sum.dim_IntList %2600, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2610 = torch.aten.div.Scalar %2609, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2611 = torch.aten.add.Tensor %2608, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2612 = torch.aten.rsqrt %2611 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2613 = torch.aten.sub.Tensor %2595, %2610, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2614 = torch.aten.mul.Tensor %2613, %2612 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2615 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2616 = torch.aten.view %2614, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2617 = torch.aten.unsqueeze %242, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2618 = torch.aten.unsqueeze %2617, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2619 = torch.aten.mul.Tensor %2616, %2618 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2620 = torch.aten.unsqueeze %243, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2621 = torch.aten.unsqueeze %2620, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2622 = torch.aten.add.Tensor %2619, %2621, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2623 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2624 = torch.aten.to.dtype %2623, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2625 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2626 = torch.aten.broadcast_to %2624, %2625 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2627 = torch.valsem.aten.copy %2626, %2622, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2628 = torch.aten.sigmoid %2627 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2629 = torch.aten.mul.Tensor %2628, %2627 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2630 = torch.aten.convolution %2629, %244, %245, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2631 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2632 = torch.aten.mul.Tensor %2631, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2633 = torch.aten.transpose.int %246, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2634 = torch.aten.mm %2632, %2633 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2635 = torch.aten.mul.Scalar %247, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2636 = torch.aten.add.Tensor %2635, %2634, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2637 = torch.aten.slice.Tensor %2636, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2638 = torch.aten.slice.Tensor %2637, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2639 = torch.aten.unsqueeze %2638, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %2640 = torch.aten.unsqueeze %2639, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %2641 = torch.aten.add.Tensor %2630, %2640, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2642 = torch.aten.view %2641, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2643 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2644 = torch.aten.to.dtype %2643, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2645 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2646 = torch.aten.broadcast_to %2644, %2645 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2647 = torch.valsem.aten.copy %2646, %2642, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2648 = torch.aten.to.dtype %2647, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2649 = torch.aten.sum.dim_IntList %2648, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2650 = torch.aten.div.Scalar %2649, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2651 = torch.aten.sub.Tensor %2648, %2650, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2652 = torch.aten.mul.Tensor %2651, %2651 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2653 = torch.aten.sum.dim_IntList %2652, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2654 = torch.aten.div.Scalar %2653, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2655 = torch.aten.to.dtype %2654, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2656 = torch.aten.sum.dim_IntList %2647, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2657 = torch.aten.div.Scalar %2656, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2658 = torch.aten.add.Tensor %2655, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2659 = torch.aten.rsqrt %2658 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2660 = torch.aten.sub.Tensor %2642, %2657, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2661 = torch.aten.mul.Tensor %2660, %2659 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2662 = torch.aten.view %2661, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2663 = torch.aten.unsqueeze %248, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2664 = torch.aten.unsqueeze %2663, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2665 = torch.aten.mul.Tensor %2662, %2664 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2666 = torch.aten.unsqueeze %249, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2667 = torch.aten.unsqueeze %2666, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2668 = torch.aten.add.Tensor %2665, %2667, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2669 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2670 = torch.aten.to.dtype %2669, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2671 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2672 = torch.aten.broadcast_to %2670, %2671 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2673 = torch.valsem.aten.copy %2672, %2668, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2674 = torch.aten.sigmoid %2673 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2675 = torch.aten.mul.Tensor %2674, %2673 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2676 = torch.aten.convolution %2675, %250, %251, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2677 = torch.aten.add.Tensor %2592, %2676, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2678 = torch.aten.div.Tensor %2677, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
    %2679 = torch.aten.clone %2678, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2680 = torch.aten.view %2679, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2681 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2682 = torch.aten.to.dtype %2681, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2683 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2684 = torch.aten.broadcast_to %2682, %2683 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2685 = torch.valsem.aten.copy %2684, %2680, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2686 = torch.aten.to.dtype %2685, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2687 = torch.aten.sum.dim_IntList %2686, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2688 = torch.aten.div.Scalar %2687, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2689 = torch.aten.sub.Tensor %2686, %2688, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2690 = torch.aten.mul.Tensor %2689, %2689 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2691 = torch.aten.sum.dim_IntList %2690, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2692 = torch.aten.div.Scalar %2691, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2693 = torch.aten.to.dtype %2692, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2694 = torch.aten.sum.dim_IntList %2685, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2695 = torch.aten.div.Scalar %2694, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2696 = torch.aten.add.Tensor %2693, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2697 = torch.aten.rsqrt %2696 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2698 = torch.aten.sub.Tensor %2680, %2695, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2699 = torch.aten.mul.Tensor %2698, %2697 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2700 = torch.aten.view %2699, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2701 = torch.aten.unsqueeze %252, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2702 = torch.aten.unsqueeze %2701, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2703 = torch.aten.mul.Tensor %2700, %2702 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2704 = torch.aten.unsqueeze %253, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2705 = torch.aten.unsqueeze %2704, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2706 = torch.aten.add.Tensor %2703, %2705, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2707 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2708 = torch.aten.to.dtype %2707, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2709 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2710 = torch.aten.broadcast_to %2708, %2709 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2711 = torch.valsem.aten.copy %2710, %2706, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2712 = torch.aten.sigmoid %2711 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2713 = torch.aten.mul.Tensor %2712, %2711 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2714 = torch.aten.convolution %2713, %254, %255, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2715 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2716 = torch.aten.mul.Tensor %2715, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2717 = torch.aten.transpose.int %256, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2718 = torch.aten.mm %2716, %2717 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2719 = torch.aten.mul.Scalar %257, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2720 = torch.aten.add.Tensor %2719, %2718, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2721 = torch.aten.slice.Tensor %2720, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2722 = torch.aten.slice.Tensor %2721, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2723 = torch.aten.unsqueeze %2722, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %2724 = torch.aten.unsqueeze %2723, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %2725 = torch.aten.add.Tensor %2714, %2724, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2726 = torch.aten.view %2725, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2727 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2728 = torch.aten.to.dtype %2727, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2729 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2730 = torch.aten.broadcast_to %2728, %2729 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2731 = torch.valsem.aten.copy %2730, %2726, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2732 = torch.aten.to.dtype %2731, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2733 = torch.aten.sum.dim_IntList %2732, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2734 = torch.aten.div.Scalar %2733, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2735 = torch.aten.sub.Tensor %2732, %2734, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2736 = torch.aten.mul.Tensor %2735, %2735 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2737 = torch.aten.sum.dim_IntList %2736, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2738 = torch.aten.div.Scalar %2737, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2739 = torch.aten.to.dtype %2738, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2740 = torch.aten.sum.dim_IntList %2731, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2741 = torch.aten.div.Scalar %2740, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2742 = torch.aten.add.Tensor %2739, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2743 = torch.aten.rsqrt %2742 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2744 = torch.aten.sub.Tensor %2726, %2741, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2745 = torch.aten.mul.Tensor %2744, %2743 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2746 = torch.aten.view %2745, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2747 = torch.aten.unsqueeze %258, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2748 = torch.aten.unsqueeze %2747, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2749 = torch.aten.mul.Tensor %2746, %2748 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2750 = torch.aten.unsqueeze %259, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2751 = torch.aten.unsqueeze %2750, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2752 = torch.aten.add.Tensor %2749, %2751, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2753 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2754 = torch.aten.to.dtype %2753, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2755 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2756 = torch.aten.broadcast_to %2754, %2755 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2757 = torch.valsem.aten.copy %2756, %2752, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2758 = torch.aten.sigmoid %2757 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2759 = torch.aten.mul.Tensor %2758, %2757 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2760 = torch.aten.convolution %2759, %260, %261, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2761 = torch.aten.add.Tensor %2678, %2760, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2762 = torch.aten.div.Tensor %2761, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
    %2763 = torch.aten.clone %2762, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2764 = torch.aten.view %2763, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2765 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2766 = torch.aten.to.dtype %2765, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2767 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2768 = torch.aten.broadcast_to %2766, %2767 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2769 = torch.valsem.aten.copy %2768, %2764, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2770 = torch.aten.to.dtype %2769, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2771 = torch.aten.sum.dim_IntList %2770, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2772 = torch.aten.div.Scalar %2771, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2773 = torch.aten.sub.Tensor %2770, %2772, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2774 = torch.aten.mul.Tensor %2773, %2773 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2775 = torch.aten.sum.dim_IntList %2774, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2776 = torch.aten.div.Scalar %2775, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2777 = torch.aten.to.dtype %2776, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2778 = torch.aten.sum.dim_IntList %2769, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2779 = torch.aten.div.Scalar %2778, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2780 = torch.aten.add.Tensor %2777, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2781 = torch.aten.rsqrt %2780 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2782 = torch.aten.sub.Tensor %2764, %2779, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2783 = torch.aten.mul.Tensor %2782, %2781 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2784 = torch.aten.view %2783, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2785 = torch.aten.unsqueeze %262, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2786 = torch.aten.unsqueeze %2785, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2787 = torch.aten.mul.Tensor %2784, %2786 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2788 = torch.aten.unsqueeze %263, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2789 = torch.aten.unsqueeze %2788, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2790 = torch.aten.add.Tensor %2787, %2789, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2791 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2792 = torch.aten.to.dtype %2791, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2793 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2794 = torch.aten.broadcast_to %2792, %2793 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2795 = torch.valsem.aten.copy %2794, %2790, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2796 = torch.aten.sigmoid %2795 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2797 = torch.aten.mul.Tensor %2796, %2795 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2798 = torch.aten.convolution %2797, %264, %265, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2799 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2800 = torch.aten.mul.Tensor %2799, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2801 = torch.aten.transpose.int %266, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2802 = torch.aten.mm %2800, %2801 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %2803 = torch.aten.mul.Scalar %267, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2804 = torch.aten.add.Tensor %2803, %2802, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2805 = torch.aten.slice.Tensor %2804, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2806 = torch.aten.slice.Tensor %2805, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %2807 = torch.aten.unsqueeze %2806, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %2808 = torch.aten.unsqueeze %2807, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %2809 = torch.aten.add.Tensor %2798, %2808, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2810 = torch.aten.view %2809, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2811 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2812 = torch.aten.to.dtype %2811, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2813 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2814 = torch.aten.broadcast_to %2812, %2813 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2815 = torch.valsem.aten.copy %2814, %2810, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2816 = torch.aten.to.dtype %2815, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2817 = torch.aten.sum.dim_IntList %2816, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2818 = torch.aten.div.Scalar %2817, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2819 = torch.aten.sub.Tensor %2816, %2818, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2820 = torch.aten.mul.Tensor %2819, %2819 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2821 = torch.aten.sum.dim_IntList %2820, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2822 = torch.aten.div.Scalar %2821, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2823 = torch.aten.to.dtype %2822, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2824 = torch.aten.sum.dim_IntList %2815, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2825 = torch.aten.div.Scalar %2824, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2826 = torch.aten.add.Tensor %2823, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2827 = torch.aten.rsqrt %2826 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2828 = torch.aten.sub.Tensor %2810, %2825, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2829 = torch.aten.mul.Tensor %2828, %2827 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2830 = torch.aten.view %2829, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2831 = torch.aten.unsqueeze %268, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2832 = torch.aten.unsqueeze %2831, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2833 = torch.aten.mul.Tensor %2830, %2832 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2834 = torch.aten.unsqueeze %269, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2835 = torch.aten.unsqueeze %2834, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2836 = torch.aten.add.Tensor %2833, %2835, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2837 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2838 = torch.aten.to.dtype %2837, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2839 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2840 = torch.aten.broadcast_to %2838, %2839 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2841 = torch.valsem.aten.copy %2840, %2836, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2842 = torch.aten.sigmoid %2841 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2843 = torch.aten.mul.Tensor %2842, %2841 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %2844 = torch.aten.convolution %2843, %270, %271, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2845 = torch.aten.add.Tensor %2762, %2844, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2846 = torch.aten.div.Tensor %2845, %9 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
    %2847 = torch.aten.clone %2846, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2848 = torch.aten.view %2847, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %2849 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2850 = torch.aten.to.dtype %2849, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2851 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2852 = torch.aten.broadcast_to %2850, %2851 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %2853 = torch.valsem.aten.copy %2852, %2848, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %2854 = torch.aten.to.dtype %2853, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %2855 = torch.aten.sum.dim_IntList %2854, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2856 = torch.aten.div.Scalar %2855, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2857 = torch.aten.sub.Tensor %2854, %2856, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %2858 = torch.aten.mul.Tensor %2857, %2857 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %2859 = torch.aten.sum.dim_IntList %2858, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %2860 = torch.aten.div.Scalar %2859, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %2861 = torch.aten.to.dtype %2860, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2862 = torch.aten.sum.dim_IntList %2853, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %2863 = torch.aten.div.Scalar %2862, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2864 = torch.aten.add.Tensor %2861, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %2865 = torch.aten.rsqrt %2864 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %2866 = torch.aten.sub.Tensor %2848, %2863, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %2867 = torch.aten.mul.Tensor %2866, %2865 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %2868 = torch.aten.view %2867, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %2869 = torch.aten.unsqueeze %272, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2870 = torch.aten.unsqueeze %2869, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2871 = torch.aten.mul.Tensor %2868, %2870 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %2872 = torch.aten.unsqueeze %273, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %2873 = torch.aten.unsqueeze %2872, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %2874 = torch.aten.add.Tensor %2871, %2873, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %2875 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2876 = torch.aten.to.dtype %2875, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2877 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2878 = torch.aten.broadcast_to %2876, %2877 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %2879 = torch.valsem.aten.copy %2878, %2874, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %2880 = torch.aten.convolution %2879, %274, %275, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %2881 = torch.aten.permute %2880, %866 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
    %2882 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2883 = torch.aten.view %2881, %2882 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2884 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2885 = torch.aten.sum.dim_IntList %2883, %2884, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
    %2886 = torch.aten.div.Scalar %2885, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %2887 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2888 = torch.aten.broadcast_to %2886, %2887 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2889 = torch.aten.sub.Tensor %2883, %2888, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %2890 = torch.aten.mul.Tensor %2889, %2889 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %2891 = torch.aten.sum.dim_IntList %2890, %2884, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
    %2892 = torch.aten.div.Scalar %2891, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %2893 = torch.aten.add.Scalar %2892, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %2894 = torch.aten.rsqrt %2893 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16>
    %2895 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2896 = torch.aten.broadcast_to %2894, %2895 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2897 = torch.aten.mul.Tensor %2889, %2896 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %2898 = torch.aten.mul.Tensor %2897, %276 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %2899 = torch.aten.add.Tensor %2898, %277, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %2900 = torch.aten.transpose.int %278, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2901 = torch.prim.ListConstruct %int128, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %2902 = torch.aten.view %2899, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %2903 = torch.aten.mm %2902, %2900 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %2904 = torch.aten.view %2903, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2905 = torch.aten.transpose.int %279, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2906 = torch.aten.view %2899, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %2907 = torch.aten.mm %2906, %2905 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %2908 = torch.aten.view %2907, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2909 = torch.aten.transpose.int %280, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2910 = torch.aten.view %2899, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %2911 = torch.aten.mm %2910, %2909 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %2912 = torch.aten.view %2911, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2913 = torch.prim.ListConstruct %int2, %int64, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2914 = torch.aten.view %2904, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
    %2915 = torch.aten.permute %2914, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
    %2916 = torch.aten.clone %2915, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
    %2917 = torch.prim.ListConstruct %int16, %int64, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2918 = torch.aten.view %2916, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2919 = torch.aten.view %2908, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
    %2920 = torch.aten.permute %2919, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
    %2921 = torch.aten.clone %2920, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
    %2922 = torch.aten.view %2921, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2923 = torch.aten.view %2912, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
    %2924 = torch.aten.permute %2923, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
    %2925 = torch.aten.clone %2924, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
    %2926 = torch.aten.view %2925, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2927 = torch.aten.transpose.int %2922, %int-1, %int-2 : !torch.vtensor<[16,64,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,64],f16>
    %2928 = torch.aten.broadcast_to %2918, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2929 = torch.aten.view %2928, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2930 = torch.prim.ListConstruct %int16, %int160, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2931 = torch.aten.broadcast_to %2927, %2930 : !torch.vtensor<[16,160,64],f16>, !torch.list<int> -> !torch.vtensor<[16,160,64],f16>
    %2932 = torch.aten.view %2931, %2930 : !torch.vtensor<[16,160,64],f16>, !torch.list<int> -> !torch.vtensor<[16,160,64],f16>
    %2933 = torch.aten.bmm %2929, %2932 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,64],f16> -> !torch.vtensor<[16,64,64],f16>
    %2934 = torch.prim.ListConstruct %int16, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2935 = torch.aten.view %2933, %2934 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
    %2936 = torch.aten.mul.Tensor %2935, %1 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,64],f16>
    %values_22, %indices_23 = torch.aten.max.dim %2936, %int-1, %true : !torch.vtensor<[16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,1],f16>, !torch.vtensor<[16,64,1],si64>
    %2937 = torch.aten.sub.Tensor %2936, %values_22, %float1.000000e00 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,1],f16>, !torch.float -> !torch.vtensor<[16,64,64],f16>
    %2938 = torch.aten.exp %2937 : !torch.vtensor<[16,64,64],f16> -> !torch.vtensor<[16,64,64],f16>
    %2939 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2940 = torch.aten.sum.dim_IntList %2938, %2939, %true, %none : !torch.vtensor<[16,64,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,64,1],f16>
    %2941 = torch.aten.div.Tensor %2938, %2940 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,1],f16> -> !torch.vtensor<[16,64,64],f16>
    %2942 = torch.aten.broadcast_to %2941, %2934 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
    %2943 = torch.aten.view %2942, %2934 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
    %2944 = torch.aten.broadcast_to %2926, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2945 = torch.aten.view %2944, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2946 = torch.aten.bmm %2943, %2945 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,160],f16> -> !torch.vtensor<[16,64,160],f16>
    %2947 = torch.aten.view %2946, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2948 = torch.prim.ListConstruct %int2, %int8, %int64, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2949 = torch.aten.view %2947, %2948 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
    %2950 = torch.aten.permute %2949, %901 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
    %2951 = torch.aten.clone %2950, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
    %2952 = torch.aten.view %2951, %2882 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2953 = torch.aten.transpose.int %281, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2954 = torch.aten.view %2952, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %2955 = torch.aten.mm %2954, %2953 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %2956 = torch.aten.mul.Scalar %282, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %2957 = torch.aten.add.Tensor %2956, %2955, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16>
    %2958 = torch.aten.view %2957, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2959 = torch.aten.add.Tensor %2958, %2883, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %2960 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2961 = torch.aten.sum.dim_IntList %2959, %2960, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
    %2962 = torch.aten.div.Scalar %2961, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %2963 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2964 = torch.aten.broadcast_to %2962, %2963 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2965 = torch.aten.sub.Tensor %2959, %2964, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %2966 = torch.aten.mul.Tensor %2965, %2965 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %2967 = torch.aten.sum.dim_IntList %2966, %2960, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
    %2968 = torch.aten.div.Scalar %2967, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %2969 = torch.aten.add.Scalar %2968, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %2970 = torch.aten.rsqrt %2969 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16>
    %2971 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2972 = torch.aten.broadcast_to %2970, %2971 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2973 = torch.aten.mul.Tensor %2965, %2972 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %2974 = torch.aten.mul.Tensor %2973, %283 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %2975 = torch.aten.add.Tensor %2974, %284, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %2976 = torch.aten.transpose.int %285, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %2977 = torch.aten.view %2975, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %2978 = torch.aten.mm %2977, %2976 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %2979 = torch.aten.view %2978, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %2980 = torch.aten.transpose.int %286, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %2981 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %2982 = torch.aten.mm %2981, %2980 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %2983 = torch.aten.view %2982, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %2984 = torch.aten.transpose.int %287, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %2985 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %2986 = torch.aten.mm %2985, %2984 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %2987 = torch.aten.view %2986, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %2988 = torch.aten.view %2979, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
    %2989 = torch.aten.permute %2988, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
    %2990 = torch.aten.clone %2989, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
    %2991 = torch.aten.view %2990, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %2992 = torch.aten.view %2983, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %2993 = torch.aten.permute %2992, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %2994 = torch.aten.clone %2993, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %2995 = torch.aten.view %2994, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %2996 = torch.aten.view %2987, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %2997 = torch.aten.permute %2996, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %2998 = torch.aten.clone %2997, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %2999 = torch.aten.view %2998, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3000 = torch.aten.transpose.int %2995, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
    %3001 = torch.aten.broadcast_to %2991, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %3002 = torch.aten.view %3001, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %3003 = torch.aten.broadcast_to %3000, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %3004 = torch.aten.view %3003, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %3005 = torch.aten.bmm %3002, %3004 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,64,77],f16>
    %3006 = torch.prim.ListConstruct %int16, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3007 = torch.aten.view %3005, %3006 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
    %3008 = torch.aten.mul.Tensor %3007, %1 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,77],f16>
    %values_24, %indices_25 = torch.aten.max.dim %3008, %int-1, %true : !torch.vtensor<[16,64,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,1],f16>, !torch.vtensor<[16,64,1],si64>
    %3009 = torch.aten.sub.Tensor %3008, %values_24, %float1.000000e00 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,64,1],f16>, !torch.float -> !torch.vtensor<[16,64,77],f16>
    %3010 = torch.aten.exp %3009 : !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,64,77],f16>
    %3011 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3012 = torch.aten.sum.dim_IntList %3010, %3011, %true, %none : !torch.vtensor<[16,64,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,64,1],f16>
    %3013 = torch.aten.div.Tensor %3010, %3012 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,64,1],f16> -> !torch.vtensor<[16,64,77],f16>
    %3014 = torch.aten.broadcast_to %3013, %3006 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
    %3015 = torch.aten.view %3014, %3006 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
    %3016 = torch.aten.broadcast_to %2999, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3017 = torch.aten.view %3016, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3018 = torch.aten.bmm %3015, %3017 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,64,160],f16>
    %3019 = torch.aten.view %3018, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
    %3020 = torch.aten.view %3019, %2948 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
    %3021 = torch.aten.permute %3020, %901 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
    %3022 = torch.aten.clone %3021, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
    %3023 = torch.aten.view %3022, %2882 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %3024 = torch.aten.transpose.int %288, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3025 = torch.aten.view %3023, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %3026 = torch.aten.mm %3025, %3024 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %3027 = torch.aten.mul.Scalar %289, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3028 = torch.aten.add.Tensor %3027, %3026, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16>
    %3029 = torch.aten.view %3028, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %3030 = torch.aten.add.Tensor %3029, %2959, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %3031 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3032 = torch.aten.sum.dim_IntList %3030, %3031, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
    %3033 = torch.aten.div.Scalar %3032, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %3034 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3035 = torch.aten.broadcast_to %3033, %3034 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %3036 = torch.aten.sub.Tensor %3030, %3035, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %3037 = torch.aten.mul.Tensor %3036, %3036 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %3038 = torch.aten.sum.dim_IntList %3037, %3031, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
    %3039 = torch.aten.div.Scalar %3038, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %3040 = torch.aten.add.Scalar %3039, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16>
    %3041 = torch.aten.rsqrt %3040 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16>
    %3042 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3043 = torch.aten.broadcast_to %3041, %3042 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %3044 = torch.aten.mul.Tensor %3036, %3043 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %3045 = torch.aten.mul.Tensor %3044, %290 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16>
    %3046 = torch.aten.add.Tensor %3045, %291, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %3047 = torch.aten.transpose.int %292, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %3048 = torch.aten.view %3046, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
    %3049 = torch.aten.mm %3048, %3047 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[128,10240],f16>
    %3050 = torch.aten.mul.Scalar %293, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %3051 = torch.aten.add.Tensor %3050, %3049, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[128,10240],f16>, !torch.int -> !torch.vtensor<[128,10240],f16>
    %3052 = torch.prim.ListConstruct %int2, %int64, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3053 = torch.aten.view %3051, %3052 : !torch.vtensor<[128,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10240],f16>
    %3054 = torch.aten.slice.Tensor %3053, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
    %3055 = torch.aten.slice.Tensor %3053, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
    %3056 = torch.aten.gelu %3055, %str : !torch.vtensor<[2,64,5120],f16>, !torch.str -> !torch.vtensor<[2,64,5120],f16>
    %3057 = torch.aten.mul.Tensor %3054, %3056 : !torch.vtensor<[2,64,5120],f16>, !torch.vtensor<[2,64,5120],f16> -> !torch.vtensor<[2,64,5120],f16>
    %3058 = torch.aten.transpose.int %294, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %3059 = torch.prim.ListConstruct %int128, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
    %3060 = torch.aten.view %3057, %3059 : !torch.vtensor<[2,64,5120],f16>, !torch.list<int> -> !torch.vtensor<[128,5120],f16>
    %3061 = torch.aten.mm %3060, %3058 : !torch.vtensor<[128,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[128,1280],f16>
    %3062 = torch.aten.mul.Scalar %295, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3063 = torch.aten.add.Tensor %3062, %3061, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16>
    %3064 = torch.aten.view %3063, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
    %3065 = torch.aten.add.Tensor %3064, %3030, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
    %3066 = torch.prim.ListConstruct %int2, %int8, %int8, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3067 = torch.aten.view %3065, %3066 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
    %3068 = torch.aten.permute %3067, %1060 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %3069 = torch.aten.convolution %3068, %296, %297, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3070 = torch.aten.add.Tensor %3069, %2846, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3071 = torch.aten.clone %3070, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3072 = torch.aten.view %3071, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %3073 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3074 = torch.aten.to.dtype %3073, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3075 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3076 = torch.aten.broadcast_to %3074, %3075 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %3077 = torch.valsem.aten.copy %3076, %3072, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %3078 = torch.aten.to.dtype %3077, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %3079 = torch.aten.sum.dim_IntList %3078, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3080 = torch.aten.div.Scalar %3079, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3081 = torch.aten.sub.Tensor %3078, %3080, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %3082 = torch.aten.mul.Tensor %3081, %3081 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %3083 = torch.aten.sum.dim_IntList %3082, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3084 = torch.aten.div.Scalar %3083, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3085 = torch.aten.to.dtype %3084, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3086 = torch.aten.sum.dim_IntList %3077, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3087 = torch.aten.div.Scalar %3086, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3088 = torch.aten.add.Tensor %3085, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3089 = torch.aten.rsqrt %3088 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3090 = torch.aten.sub.Tensor %3072, %3087, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %3091 = torch.aten.mul.Tensor %3090, %3089 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %3092 = torch.aten.view %3091, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %3093 = torch.aten.unsqueeze %298, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3094 = torch.aten.unsqueeze %3093, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3095 = torch.aten.mul.Tensor %3092, %3094 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %3096 = torch.aten.unsqueeze %299, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3097 = torch.aten.unsqueeze %3096, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3098 = torch.aten.add.Tensor %3095, %3097, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %3099 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3100 = torch.aten.to.dtype %3099, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3101 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3102 = torch.aten.broadcast_to %3100, %3101 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %3103 = torch.valsem.aten.copy %3102, %3098, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %3104 = torch.aten.sigmoid %3103 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3105 = torch.aten.mul.Tensor %3104, %3103 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3106 = torch.aten.convolution %3105, %300, %301, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3107 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3108 = torch.aten.mul.Tensor %3107, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3109 = torch.aten.transpose.int %302, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3110 = torch.aten.mm %3108, %3109 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3111 = torch.aten.mul.Scalar %303, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3112 = torch.aten.add.Tensor %3111, %3110, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3113 = torch.aten.slice.Tensor %3112, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3114 = torch.aten.slice.Tensor %3113, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3115 = torch.aten.unsqueeze %3114, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %3116 = torch.aten.unsqueeze %3115, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %3117 = torch.aten.add.Tensor %3106, %3116, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3118 = torch.aten.view %3117, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %3119 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3120 = torch.aten.to.dtype %3119, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3121 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3122 = torch.aten.broadcast_to %3120, %3121 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %3123 = torch.valsem.aten.copy %3122, %3118, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %3124 = torch.aten.to.dtype %3123, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %3125 = torch.aten.sum.dim_IntList %3124, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3126 = torch.aten.div.Scalar %3125, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3127 = torch.aten.sub.Tensor %3124, %3126, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %3128 = torch.aten.mul.Tensor %3127, %3127 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %3129 = torch.aten.sum.dim_IntList %3128, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3130 = torch.aten.div.Scalar %3129, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3131 = torch.aten.to.dtype %3130, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3132 = torch.aten.sum.dim_IntList %3123, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3133 = torch.aten.div.Scalar %3132, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3134 = torch.aten.add.Tensor %3131, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3135 = torch.aten.rsqrt %3134 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3136 = torch.aten.sub.Tensor %3118, %3133, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %3137 = torch.aten.mul.Tensor %3136, %3135 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %3138 = torch.aten.view %3137, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %3139 = torch.aten.unsqueeze %304, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3140 = torch.aten.unsqueeze %3139, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3141 = torch.aten.mul.Tensor %3138, %3140 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %3142 = torch.aten.unsqueeze %305, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3143 = torch.aten.unsqueeze %3142, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3144 = torch.aten.add.Tensor %3141, %3143, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %3145 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3146 = torch.aten.to.dtype %3145, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3147 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3148 = torch.aten.broadcast_to %3146, %3147 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %3149 = torch.valsem.aten.copy %3148, %3144, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %3150 = torch.aten.sigmoid %3149 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3151 = torch.aten.mul.Tensor %3150, %3149 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3152 = torch.aten.convolution %3151, %306, %307, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3153 = torch.aten.add.Tensor %3070, %3152, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3154 = torch.aten.div.Tensor %3153, %9 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
    %3155 = torch.prim.ListConstruct %3154, %2762 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
    %3156 = torch.aten.cat %3155, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
    %3157 = torch.aten.clone %3156, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
    %3158 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3159 = torch.aten.view %3157, %3158 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
    %3160 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3161 = torch.aten.to.dtype %3160, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3162 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3163 = torch.aten.broadcast_to %3161, %3162 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f32>
    %3164 = torch.valsem.aten.copy %3163, %3159, %false : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,80,64],f16>, !torch.bool -> !torch.vtensor<[2,32,80,64],f32>
    %3165 = torch.aten.to.dtype %3164, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f64>
    %3166 = torch.aten.sum.dim_IntList %3165, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3167 = torch.aten.div.Scalar %3166, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3168 = torch.aten.sub.Tensor %3165, %3167, %float1.000000e00 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,64],f64>
    %3169 = torch.aten.mul.Tensor %3168, %3168 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,80,64],f64> -> !torch.vtensor<[2,32,80,64],f64>
    %3170 = torch.aten.sum.dim_IntList %3169, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3171 = torch.aten.div.Scalar %3170, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3172 = torch.aten.to.dtype %3171, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3173 = torch.aten.sum.dim_IntList %3164, %754, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3174 = torch.aten.div.Scalar %3173, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3175 = torch.aten.add.Tensor %3172, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3176 = torch.aten.rsqrt %3175 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3177 = torch.aten.sub.Tensor %3159, %3174, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
    %3178 = torch.aten.mul.Tensor %3177, %3176 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
    %3179 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3180 = torch.aten.view %3178, %3179 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
    %3181 = torch.aten.unsqueeze %308, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3182 = torch.aten.unsqueeze %3181, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3183 = torch.aten.mul.Tensor %3180, %3182 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
    %3184 = torch.aten.unsqueeze %309, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3185 = torch.aten.unsqueeze %3184, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3186 = torch.aten.add.Tensor %3183, %3185, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
    %3187 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3188 = torch.aten.to.dtype %3187, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3189 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3190 = torch.aten.broadcast_to %3188, %3189 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f16>
    %3191 = torch.valsem.aten.copy %3190, %3186, %false : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f32>, !torch.bool -> !torch.vtensor<[2,2560,8,8],f16>
    %3192 = torch.aten.sigmoid %3191 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
    %3193 = torch.aten.mul.Tensor %3192, %3191 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
    %3194 = torch.aten.convolution %3193, %310, %311, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3195 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3196 = torch.aten.mul.Tensor %3195, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3197 = torch.aten.transpose.int %312, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3198 = torch.aten.mm %3196, %3197 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3199 = torch.aten.mul.Scalar %313, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3200 = torch.aten.add.Tensor %3199, %3198, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3201 = torch.aten.slice.Tensor %3200, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3202 = torch.aten.slice.Tensor %3201, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3203 = torch.aten.unsqueeze %3202, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %3204 = torch.aten.unsqueeze %3203, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %3205 = torch.aten.add.Tensor %3194, %3204, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3206 = torch.aten.view %3205, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %3207 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3208 = torch.aten.to.dtype %3207, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3209 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3210 = torch.aten.broadcast_to %3208, %3209 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %3211 = torch.valsem.aten.copy %3210, %3206, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %3212 = torch.aten.to.dtype %3211, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %3213 = torch.aten.sum.dim_IntList %3212, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3214 = torch.aten.div.Scalar %3213, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3215 = torch.aten.sub.Tensor %3212, %3214, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %3216 = torch.aten.mul.Tensor %3215, %3215 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %3217 = torch.aten.sum.dim_IntList %3216, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3218 = torch.aten.div.Scalar %3217, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3219 = torch.aten.to.dtype %3218, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3220 = torch.aten.sum.dim_IntList %3211, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3221 = torch.aten.div.Scalar %3220, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3222 = torch.aten.add.Tensor %3219, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3223 = torch.aten.rsqrt %3222 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3224 = torch.aten.sub.Tensor %3206, %3221, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %3225 = torch.aten.mul.Tensor %3224, %3223 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %3226 = torch.aten.view %3225, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %3227 = torch.aten.unsqueeze %314, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3228 = torch.aten.unsqueeze %3227, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3229 = torch.aten.mul.Tensor %3226, %3228 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %3230 = torch.aten.unsqueeze %315, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3231 = torch.aten.unsqueeze %3230, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3232 = torch.aten.add.Tensor %3229, %3231, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %3233 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3234 = torch.aten.to.dtype %3233, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3235 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3236 = torch.aten.broadcast_to %3234, %3235 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %3237 = torch.valsem.aten.copy %3236, %3232, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %3238 = torch.aten.sigmoid %3237 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3239 = torch.aten.mul.Tensor %3238, %3237 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3240 = torch.aten.convolution %3239, %316, %317, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3241 = torch.aten.convolution %3156, %318, %319, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3242 = torch.aten.add.Tensor %3241, %3240, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3243 = torch.aten.div.Tensor %3242, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
    %3244 = torch.prim.ListConstruct %3243, %2678 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
    %3245 = torch.aten.cat %3244, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
    %3246 = torch.aten.clone %3245, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
    %3247 = torch.aten.view %3246, %3158 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
    %3248 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3249 = torch.aten.to.dtype %3248, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3250 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3251 = torch.aten.broadcast_to %3249, %3250 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f32>
    %3252 = torch.valsem.aten.copy %3251, %3247, %false : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,80,64],f16>, !torch.bool -> !torch.vtensor<[2,32,80,64],f32>
    %3253 = torch.aten.to.dtype %3252, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f64>
    %3254 = torch.aten.sum.dim_IntList %3253, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3255 = torch.aten.div.Scalar %3254, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3256 = torch.aten.sub.Tensor %3253, %3255, %float1.000000e00 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,64],f64>
    %3257 = torch.aten.mul.Tensor %3256, %3256 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,80,64],f64> -> !torch.vtensor<[2,32,80,64],f64>
    %3258 = torch.aten.sum.dim_IntList %3257, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3259 = torch.aten.div.Scalar %3258, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3260 = torch.aten.to.dtype %3259, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3261 = torch.aten.sum.dim_IntList %3252, %754, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3262 = torch.aten.div.Scalar %3261, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3263 = torch.aten.add.Tensor %3260, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3264 = torch.aten.rsqrt %3263 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3265 = torch.aten.sub.Tensor %3247, %3262, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
    %3266 = torch.aten.mul.Tensor %3265, %3264 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
    %3267 = torch.aten.view %3266, %3179 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
    %3268 = torch.aten.unsqueeze %320, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3269 = torch.aten.unsqueeze %3268, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3270 = torch.aten.mul.Tensor %3267, %3269 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
    %3271 = torch.aten.unsqueeze %321, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3272 = torch.aten.unsqueeze %3271, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3273 = torch.aten.add.Tensor %3270, %3272, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
    %3274 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3275 = torch.aten.to.dtype %3274, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3276 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3277 = torch.aten.broadcast_to %3275, %3276 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f16>
    %3278 = torch.valsem.aten.copy %3277, %3273, %false : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f32>, !torch.bool -> !torch.vtensor<[2,2560,8,8],f16>
    %3279 = torch.aten.sigmoid %3278 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
    %3280 = torch.aten.mul.Tensor %3279, %3278 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
    %3281 = torch.aten.convolution %3280, %322, %323, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3282 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3283 = torch.aten.mul.Tensor %3282, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3284 = torch.aten.transpose.int %324, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3285 = torch.aten.mm %3283, %3284 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3286 = torch.aten.mul.Scalar %325, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3287 = torch.aten.add.Tensor %3286, %3285, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3288 = torch.aten.slice.Tensor %3287, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3289 = torch.aten.slice.Tensor %3288, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3290 = torch.aten.unsqueeze %3289, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %3291 = torch.aten.unsqueeze %3290, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %3292 = torch.aten.add.Tensor %3281, %3291, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3293 = torch.aten.view %3292, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %3294 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3295 = torch.aten.to.dtype %3294, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3296 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3297 = torch.aten.broadcast_to %3295, %3296 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %3298 = torch.valsem.aten.copy %3297, %3293, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %3299 = torch.aten.to.dtype %3298, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %3300 = torch.aten.sum.dim_IntList %3299, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3301 = torch.aten.div.Scalar %3300, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3302 = torch.aten.sub.Tensor %3299, %3301, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %3303 = torch.aten.mul.Tensor %3302, %3302 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %3304 = torch.aten.sum.dim_IntList %3303, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3305 = torch.aten.div.Scalar %3304, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3306 = torch.aten.to.dtype %3305, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3307 = torch.aten.sum.dim_IntList %3298, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3308 = torch.aten.div.Scalar %3307, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3309 = torch.aten.add.Tensor %3306, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3310 = torch.aten.rsqrt %3309 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3311 = torch.aten.sub.Tensor %3293, %3308, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %3312 = torch.aten.mul.Tensor %3311, %3310 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %3313 = torch.aten.view %3312, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %3314 = torch.aten.unsqueeze %326, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3315 = torch.aten.unsqueeze %3314, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3316 = torch.aten.mul.Tensor %3313, %3315 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %3317 = torch.aten.unsqueeze %327, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3318 = torch.aten.unsqueeze %3317, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3319 = torch.aten.add.Tensor %3316, %3318, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %3320 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3321 = torch.aten.to.dtype %3320, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3322 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3323 = torch.aten.broadcast_to %3321, %3322 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %3324 = torch.valsem.aten.copy %3323, %3319, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %3325 = torch.aten.sigmoid %3324 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3326 = torch.aten.mul.Tensor %3325, %3324 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3327 = torch.aten.convolution %3326, %328, %329, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3328 = torch.aten.convolution %3245, %330, %331, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3329 = torch.aten.add.Tensor %3328, %3327, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3330 = torch.aten.div.Tensor %3329, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
    %3331 = torch.prim.ListConstruct %3330, %2592 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
    %3332 = torch.aten.cat %3331, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
    %3333 = torch.aten.clone %3332, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
    %3334 = torch.aten.view %3333, %3158 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
    %3335 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3336 = torch.aten.to.dtype %3335, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3337 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3338 = torch.aten.broadcast_to %3336, %3337 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f32>
    %3339 = torch.valsem.aten.copy %3338, %3334, %false : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,80,64],f16>, !torch.bool -> !torch.vtensor<[2,32,80,64],f32>
    %3340 = torch.aten.to.dtype %3339, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f64>
    %3341 = torch.aten.sum.dim_IntList %3340, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3342 = torch.aten.div.Scalar %3341, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3343 = torch.aten.sub.Tensor %3340, %3342, %float1.000000e00 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,64],f64>
    %3344 = torch.aten.mul.Tensor %3343, %3343 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,80,64],f64> -> !torch.vtensor<[2,32,80,64],f64>
    %3345 = torch.aten.sum.dim_IntList %3344, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3346 = torch.aten.div.Scalar %3345, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3347 = torch.aten.to.dtype %3346, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3348 = torch.aten.sum.dim_IntList %3339, %754, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3349 = torch.aten.div.Scalar %3348, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3350 = torch.aten.add.Tensor %3347, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3351 = torch.aten.rsqrt %3350 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3352 = torch.aten.sub.Tensor %3334, %3349, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
    %3353 = torch.aten.mul.Tensor %3352, %3351 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
    %3354 = torch.aten.view %3353, %3179 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
    %3355 = torch.aten.unsqueeze %332, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3356 = torch.aten.unsqueeze %3355, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3357 = torch.aten.mul.Tensor %3354, %3356 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
    %3358 = torch.aten.unsqueeze %333, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3359 = torch.aten.unsqueeze %3358, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3360 = torch.aten.add.Tensor %3357, %3359, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
    %3361 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3362 = torch.aten.to.dtype %3361, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3363 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3364 = torch.aten.broadcast_to %3362, %3363 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f16>
    %3365 = torch.valsem.aten.copy %3364, %3360, %false : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f32>, !torch.bool -> !torch.vtensor<[2,2560,8,8],f16>
    %3366 = torch.aten.sigmoid %3365 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
    %3367 = torch.aten.mul.Tensor %3366, %3365 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
    %3368 = torch.aten.convolution %3367, %334, %335, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3369 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3370 = torch.aten.mul.Tensor %3369, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3371 = torch.aten.transpose.int %336, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3372 = torch.aten.mm %3370, %3371 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3373 = torch.aten.mul.Scalar %337, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3374 = torch.aten.add.Tensor %3373, %3372, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3375 = torch.aten.slice.Tensor %3374, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3376 = torch.aten.slice.Tensor %3375, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3377 = torch.aten.unsqueeze %3376, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %3378 = torch.aten.unsqueeze %3377, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %3379 = torch.aten.add.Tensor %3368, %3378, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3380 = torch.aten.view %3379, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
    %3381 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3382 = torch.aten.to.dtype %3381, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3383 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3384 = torch.aten.broadcast_to %3382, %3383 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
    %3385 = torch.valsem.aten.copy %3384, %3380, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
    %3386 = torch.aten.to.dtype %3385, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
    %3387 = torch.aten.sum.dim_IntList %3386, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3388 = torch.aten.div.Scalar %3387, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3389 = torch.aten.sub.Tensor %3386, %3388, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
    %3390 = torch.aten.mul.Tensor %3389, %3389 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
    %3391 = torch.aten.sum.dim_IntList %3390, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3392 = torch.aten.div.Scalar %3391, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3393 = torch.aten.to.dtype %3392, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3394 = torch.aten.sum.dim_IntList %3385, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3395 = torch.aten.div.Scalar %3394, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3396 = torch.aten.add.Tensor %3393, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3397 = torch.aten.rsqrt %3396 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3398 = torch.aten.sub.Tensor %3380, %3395, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
    %3399 = torch.aten.mul.Tensor %3398, %3397 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
    %3400 = torch.aten.view %3399, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
    %3401 = torch.aten.unsqueeze %338, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3402 = torch.aten.unsqueeze %3401, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3403 = torch.aten.mul.Tensor %3400, %3402 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
    %3404 = torch.aten.unsqueeze %339, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3405 = torch.aten.unsqueeze %3404, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3406 = torch.aten.add.Tensor %3403, %3405, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
    %3407 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3408 = torch.aten.to.dtype %3407, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3409 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3410 = torch.aten.broadcast_to %3408, %3409 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
    %3411 = torch.valsem.aten.copy %3410, %3406, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
    %3412 = torch.aten.sigmoid %3411 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3413 = torch.aten.mul.Tensor %3412, %3411 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
    %3414 = torch.aten.convolution %3413, %340, %341, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3415 = torch.aten.convolution %3332, %342, %343, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3416 = torch.aten.add.Tensor %3415, %3414, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
    %3417 = torch.aten.div.Tensor %3416, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
    %3418 = torch.prim.ListConstruct %float2.000000e00, %float2.000000e00 : (!torch.float, !torch.float) -> !torch.list<float>
    %3419 = torch.aten.upsample_nearest2d.vec %3417, %none, %3418 : !torch.vtensor<[2,1280,8,8],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,16,16],f16>
    %3420 = torch.aten.convolution %3419, %344, %345, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3421 = torch.prim.ListConstruct %3420, %2591 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
    %3422 = torch.aten.cat %3421, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
    %3423 = torch.aten.clone %3422, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
    %3424 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3425 = torch.aten.view %3423, %3424 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
    %3426 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3427 = torch.aten.to.dtype %3426, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3428 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3429 = torch.aten.broadcast_to %3427, %3428 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f32>
    %3430 = torch.valsem.aten.copy %3429, %3425, %false : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,80,256],f16>, !torch.bool -> !torch.vtensor<[2,32,80,256],f32>
    %3431 = torch.aten.to.dtype %3430, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f64>
    %3432 = torch.aten.sum.dim_IntList %3431, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3433 = torch.aten.div.Scalar %3432, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3434 = torch.aten.sub.Tensor %3431, %3433, %float1.000000e00 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,256],f64>
    %3435 = torch.aten.mul.Tensor %3434, %3434 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,80,256],f64> -> !torch.vtensor<[2,32,80,256],f64>
    %3436 = torch.aten.sum.dim_IntList %3435, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3437 = torch.aten.div.Scalar %3436, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3438 = torch.aten.to.dtype %3437, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3439 = torch.aten.sum.dim_IntList %3430, %754, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3440 = torch.aten.div.Scalar %3439, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3441 = torch.aten.add.Tensor %3438, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3442 = torch.aten.rsqrt %3441 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3443 = torch.aten.sub.Tensor %3425, %3440, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
    %3444 = torch.aten.mul.Tensor %3443, %3442 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
    %3445 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3446 = torch.aten.view %3444, %3445 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
    %3447 = torch.aten.unsqueeze %346, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3448 = torch.aten.unsqueeze %3447, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3449 = torch.aten.mul.Tensor %3446, %3448 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
    %3450 = torch.aten.unsqueeze %347, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3451 = torch.aten.unsqueeze %3450, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3452 = torch.aten.add.Tensor %3449, %3451, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
    %3453 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3454 = torch.aten.to.dtype %3453, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3455 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3456 = torch.aten.broadcast_to %3454, %3455 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f16>
    %3457 = torch.valsem.aten.copy %3456, %3452, %false : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f32>, !torch.bool -> !torch.vtensor<[2,2560,16,16],f16>
    %3458 = torch.aten.sigmoid %3457 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
    %3459 = torch.aten.mul.Tensor %3458, %3457 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
    %3460 = torch.aten.convolution %3459, %348, %349, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3461 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3462 = torch.aten.mul.Tensor %3461, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3463 = torch.aten.transpose.int %350, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3464 = torch.aten.mm %3462, %3463 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3465 = torch.aten.mul.Scalar %351, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3466 = torch.aten.add.Tensor %3465, %3464, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3467 = torch.aten.slice.Tensor %3466, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3468 = torch.aten.slice.Tensor %3467, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3469 = torch.aten.unsqueeze %3468, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %3470 = torch.aten.unsqueeze %3469, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %3471 = torch.aten.add.Tensor %3460, %3470, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3472 = torch.aten.view %3471, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %3473 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3474 = torch.aten.to.dtype %3473, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3475 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3476 = torch.aten.broadcast_to %3474, %3475 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %3477 = torch.valsem.aten.copy %3476, %3472, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %3478 = torch.aten.to.dtype %3477, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %3479 = torch.aten.sum.dim_IntList %3478, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3480 = torch.aten.div.Scalar %3479, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3481 = torch.aten.sub.Tensor %3478, %3480, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %3482 = torch.aten.mul.Tensor %3481, %3481 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %3483 = torch.aten.sum.dim_IntList %3482, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3484 = torch.aten.div.Scalar %3483, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3485 = torch.aten.to.dtype %3484, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3486 = torch.aten.sum.dim_IntList %3477, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3487 = torch.aten.div.Scalar %3486, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3488 = torch.aten.add.Tensor %3485, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3489 = torch.aten.rsqrt %3488 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3490 = torch.aten.sub.Tensor %3472, %3487, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %3491 = torch.aten.mul.Tensor %3490, %3489 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %3492 = torch.aten.view %3491, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %3493 = torch.aten.unsqueeze %352, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3494 = torch.aten.unsqueeze %3493, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3495 = torch.aten.mul.Tensor %3492, %3494 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %3496 = torch.aten.unsqueeze %353, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3497 = torch.aten.unsqueeze %3496, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3498 = torch.aten.add.Tensor %3495, %3497, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %3499 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3500 = torch.aten.to.dtype %3499, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3501 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3502 = torch.aten.broadcast_to %3500, %3501 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %3503 = torch.valsem.aten.copy %3502, %3498, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %3504 = torch.aten.sigmoid %3503 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %3505 = torch.aten.mul.Tensor %3504, %3503 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %3506 = torch.aten.convolution %3505, %354, %355, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3507 = torch.aten.convolution %3422, %356, %357, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3508 = torch.aten.add.Tensor %3507, %3506, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3509 = torch.aten.div.Tensor %3508, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
    %3510 = torch.aten.clone %3509, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3511 = torch.aten.view %3510, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %3512 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3513 = torch.aten.to.dtype %3512, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3514 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3515 = torch.aten.broadcast_to %3513, %3514 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %3516 = torch.valsem.aten.copy %3515, %3511, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %3517 = torch.aten.to.dtype %3516, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %3518 = torch.aten.sum.dim_IntList %3517, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3519 = torch.aten.div.Scalar %3518, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3520 = torch.aten.sub.Tensor %3517, %3519, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %3521 = torch.aten.mul.Tensor %3520, %3520 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %3522 = torch.aten.sum.dim_IntList %3521, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3523 = torch.aten.div.Scalar %3522, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3524 = torch.aten.to.dtype %3523, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3525 = torch.aten.sum.dim_IntList %3516, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3526 = torch.aten.div.Scalar %3525, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3527 = torch.aten.add.Tensor %3524, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3528 = torch.aten.rsqrt %3527 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3529 = torch.aten.sub.Tensor %3511, %3526, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %3530 = torch.aten.mul.Tensor %3529, %3528 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %3531 = torch.aten.view %3530, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %3532 = torch.aten.unsqueeze %358, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3533 = torch.aten.unsqueeze %3532, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3534 = torch.aten.mul.Tensor %3531, %3533 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %3535 = torch.aten.unsqueeze %359, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3536 = torch.aten.unsqueeze %3535, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3537 = torch.aten.add.Tensor %3534, %3536, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %3538 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3539 = torch.aten.to.dtype %3538, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3540 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3541 = torch.aten.broadcast_to %3539, %3540 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %3542 = torch.valsem.aten.copy %3541, %3537, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %3543 = torch.aten.convolution %3542, %360, %361, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3544 = torch.aten.permute %3543, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %3545 = torch.aten.view %3544, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3546 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3547 = torch.aten.sum.dim_IntList %3545, %3546, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3548 = torch.aten.div.Scalar %3547, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3549 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3550 = torch.aten.broadcast_to %3548, %3549 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3551 = torch.aten.sub.Tensor %3545, %3550, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3552 = torch.aten.mul.Tensor %3551, %3551 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3553 = torch.aten.sum.dim_IntList %3552, %3546, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3554 = torch.aten.div.Scalar %3553, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3555 = torch.aten.add.Scalar %3554, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3556 = torch.aten.rsqrt %3555 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %3557 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3558 = torch.aten.broadcast_to %3556, %3557 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3559 = torch.aten.mul.Tensor %3551, %3558 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3560 = torch.aten.mul.Tensor %3559, %362 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3561 = torch.aten.add.Tensor %3560, %363, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3562 = torch.aten.transpose.int %364, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3563 = torch.aten.view %3561, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3564 = torch.aten.mm %3563, %3562 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3565 = torch.aten.view %3564, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3566 = torch.aten.transpose.int %365, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3567 = torch.aten.view %3561, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3568 = torch.aten.mm %3567, %3566 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3569 = torch.aten.view %3568, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3570 = torch.aten.transpose.int %366, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3571 = torch.aten.view %3561, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3572 = torch.aten.mm %3571, %3570 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3573 = torch.aten.view %3572, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3574 = torch.aten.view %3565, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3575 = torch.aten.permute %3574, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3576 = torch.aten.clone %3575, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3577 = torch.aten.view %3576, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3578 = torch.aten.view %3569, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3579 = torch.aten.permute %3578, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3580 = torch.aten.clone %3579, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3581 = torch.aten.view %3580, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3582 = torch.aten.view %3573, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3583 = torch.aten.permute %3582, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3584 = torch.aten.clone %3583, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3585 = torch.aten.view %3584, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3586 = torch.aten.transpose.int %3581, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
    %3587 = torch.aten.broadcast_to %3577, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3588 = torch.aten.view %3587, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3589 = torch.aten.broadcast_to %3586, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %3590 = torch.aten.view %3589, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %3591 = torch.aten.bmm %3588, %3590 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %3592 = torch.aten.view %3591, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %3593 = torch.aten.mul.Tensor %3592, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
    %values_26, %indices_27 = torch.aten.max.dim %3593, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %3594 = torch.aten.sub.Tensor %3593, %values_26, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
    %3595 = torch.aten.exp %3594 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %3596 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3597 = torch.aten.sum.dim_IntList %3595, %3596, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %3598 = torch.aten.div.Tensor %3595, %3597 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
    %3599 = torch.aten.broadcast_to %3598, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %3600 = torch.aten.view %3599, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %3601 = torch.aten.broadcast_to %3585, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3602 = torch.aten.view %3601, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3603 = torch.aten.bmm %3600, %3602 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %3604 = torch.aten.view %3603, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3605 = torch.aten.view %3604, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3606 = torch.aten.permute %3605, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3607 = torch.aten.clone %3606, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %3608 = torch.aten.view %3607, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3609 = torch.aten.transpose.int %367, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3610 = torch.aten.view %3608, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3611 = torch.aten.mm %3610, %3609 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3612 = torch.aten.mul.Scalar %368, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3613 = torch.aten.add.Tensor %3612, %3611, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %3614 = torch.aten.view %3613, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3615 = torch.aten.add.Tensor %3614, %3545, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3616 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3617 = torch.aten.sum.dim_IntList %3615, %3616, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3618 = torch.aten.div.Scalar %3617, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3619 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3620 = torch.aten.broadcast_to %3618, %3619 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3621 = torch.aten.sub.Tensor %3615, %3620, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3622 = torch.aten.mul.Tensor %3621, %3621 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3623 = torch.aten.sum.dim_IntList %3622, %3616, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3624 = torch.aten.div.Scalar %3623, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3625 = torch.aten.add.Scalar %3624, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3626 = torch.aten.rsqrt %3625 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %3627 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3628 = torch.aten.broadcast_to %3626, %3627 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3629 = torch.aten.mul.Tensor %3621, %3628 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3630 = torch.aten.mul.Tensor %3629, %369 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3631 = torch.aten.add.Tensor %3630, %370, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3632 = torch.aten.transpose.int %371, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3633 = torch.aten.view %3631, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3634 = torch.aten.mm %3633, %3632 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3635 = torch.aten.view %3634, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3636 = torch.aten.transpose.int %372, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %3637 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %3638 = torch.aten.mm %3637, %3636 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %3639 = torch.aten.view %3638, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %3640 = torch.aten.transpose.int %373, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %3641 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %3642 = torch.aten.mm %3641, %3640 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %3643 = torch.aten.view %3642, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %3644 = torch.aten.view %3635, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3645 = torch.aten.permute %3644, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3646 = torch.aten.clone %3645, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3647 = torch.aten.view %3646, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3648 = torch.aten.view %3639, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %3649 = torch.aten.permute %3648, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %3650 = torch.aten.clone %3649, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %3651 = torch.aten.view %3650, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3652 = torch.aten.view %3643, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %3653 = torch.aten.permute %3652, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %3654 = torch.aten.clone %3653, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %3655 = torch.aten.view %3654, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3656 = torch.aten.transpose.int %3651, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
    %3657 = torch.aten.broadcast_to %3647, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3658 = torch.aten.view %3657, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3659 = torch.aten.broadcast_to %3656, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %3660 = torch.aten.view %3659, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %3661 = torch.aten.bmm %3658, %3660 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %3662 = torch.aten.view %3661, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %3663 = torch.aten.mul.Tensor %3662, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
    %values_28, %indices_29 = torch.aten.max.dim %3663, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %3664 = torch.aten.sub.Tensor %3663, %values_28, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
    %3665 = torch.aten.exp %3664 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %3666 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3667 = torch.aten.sum.dim_IntList %3665, %3666, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %3668 = torch.aten.div.Tensor %3665, %3667 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
    %3669 = torch.aten.broadcast_to %3668, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %3670 = torch.aten.view %3669, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %3671 = torch.aten.broadcast_to %3655, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3672 = torch.aten.view %3671, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3673 = torch.aten.bmm %3670, %3672 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %3674 = torch.aten.view %3673, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3675 = torch.aten.view %3674, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3676 = torch.aten.permute %3675, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3677 = torch.aten.clone %3676, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %3678 = torch.aten.view %3677, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3679 = torch.aten.transpose.int %374, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3680 = torch.aten.view %3678, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3681 = torch.aten.mm %3680, %3679 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3682 = torch.aten.mul.Scalar %375, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3683 = torch.aten.add.Tensor %3682, %3681, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %3684 = torch.aten.view %3683, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3685 = torch.aten.add.Tensor %3684, %3615, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3686 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3687 = torch.aten.sum.dim_IntList %3685, %3686, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3688 = torch.aten.div.Scalar %3687, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3689 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3690 = torch.aten.broadcast_to %3688, %3689 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3691 = torch.aten.sub.Tensor %3685, %3690, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3692 = torch.aten.mul.Tensor %3691, %3691 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3693 = torch.aten.sum.dim_IntList %3692, %3686, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3694 = torch.aten.div.Scalar %3693, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3695 = torch.aten.add.Scalar %3694, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3696 = torch.aten.rsqrt %3695 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %3697 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3698 = torch.aten.broadcast_to %3696, %3697 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3699 = torch.aten.mul.Tensor %3691, %3698 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3700 = torch.aten.mul.Tensor %3699, %376 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3701 = torch.aten.add.Tensor %3700, %377, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3702 = torch.aten.transpose.int %378, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %3703 = torch.aten.view %3701, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3704 = torch.aten.mm %3703, %3702 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
    %3705 = torch.aten.mul.Scalar %379, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %3706 = torch.aten.add.Tensor %3705, %3704, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
    %3707 = torch.aten.view %3706, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
    %3708 = torch.aten.slice.Tensor %3707, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %3709 = torch.aten.slice.Tensor %3707, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %3710 = torch.aten.gelu %3709, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
    %3711 = torch.aten.mul.Tensor %3708, %3710 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
    %3712 = torch.aten.transpose.int %380, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %3713 = torch.aten.view %3711, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
    %3714 = torch.aten.mm %3713, %3712 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3715 = torch.aten.mul.Scalar %381, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3716 = torch.aten.add.Tensor %3715, %3714, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %3717 = torch.aten.view %3716, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3718 = torch.aten.add.Tensor %3717, %3685, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3719 = torch.aten.view %3718, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %3720 = torch.aten.permute %3719, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %3721 = torch.aten.convolution %3720, %382, %383, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3722 = torch.aten.add.Tensor %3721, %3509, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3723 = torch.prim.ListConstruct %3722, %2294 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
    %3724 = torch.aten.cat %3723, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
    %3725 = torch.aten.clone %3724, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
    %3726 = torch.aten.view %3725, %3424 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
    %3727 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3728 = torch.aten.to.dtype %3727, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3729 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3730 = torch.aten.broadcast_to %3728, %3729 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f32>
    %3731 = torch.valsem.aten.copy %3730, %3726, %false : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,80,256],f16>, !torch.bool -> !torch.vtensor<[2,32,80,256],f32>
    %3732 = torch.aten.to.dtype %3731, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f64>
    %3733 = torch.aten.sum.dim_IntList %3732, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3734 = torch.aten.div.Scalar %3733, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3735 = torch.aten.sub.Tensor %3732, %3734, %float1.000000e00 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,256],f64>
    %3736 = torch.aten.mul.Tensor %3735, %3735 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,80,256],f64> -> !torch.vtensor<[2,32,80,256],f64>
    %3737 = torch.aten.sum.dim_IntList %3736, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3738 = torch.aten.div.Scalar %3737, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3739 = torch.aten.to.dtype %3738, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3740 = torch.aten.sum.dim_IntList %3731, %754, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3741 = torch.aten.div.Scalar %3740, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3742 = torch.aten.add.Tensor %3739, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3743 = torch.aten.rsqrt %3742 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3744 = torch.aten.sub.Tensor %3726, %3741, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
    %3745 = torch.aten.mul.Tensor %3744, %3743 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
    %3746 = torch.aten.view %3745, %3445 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
    %3747 = torch.aten.unsqueeze %384, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3748 = torch.aten.unsqueeze %3747, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3749 = torch.aten.mul.Tensor %3746, %3748 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
    %3750 = torch.aten.unsqueeze %385, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
    %3751 = torch.aten.unsqueeze %3750, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
    %3752 = torch.aten.add.Tensor %3749, %3751, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
    %3753 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3754 = torch.aten.to.dtype %3753, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3755 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3756 = torch.aten.broadcast_to %3754, %3755 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f16>
    %3757 = torch.valsem.aten.copy %3756, %3752, %false : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f32>, !torch.bool -> !torch.vtensor<[2,2560,16,16],f16>
    %3758 = torch.aten.sigmoid %3757 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
    %3759 = torch.aten.mul.Tensor %3758, %3757 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
    %3760 = torch.aten.convolution %3759, %386, %387, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3761 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3762 = torch.aten.mul.Tensor %3761, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3763 = torch.aten.transpose.int %388, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3764 = torch.aten.mm %3762, %3763 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %3765 = torch.aten.mul.Scalar %389, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3766 = torch.aten.add.Tensor %3765, %3764, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3767 = torch.aten.slice.Tensor %3766, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3768 = torch.aten.slice.Tensor %3767, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %3769 = torch.aten.unsqueeze %3768, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %3770 = torch.aten.unsqueeze %3769, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %3771 = torch.aten.add.Tensor %3760, %3770, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3772 = torch.aten.view %3771, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %3773 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3774 = torch.aten.to.dtype %3773, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3775 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3776 = torch.aten.broadcast_to %3774, %3775 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %3777 = torch.valsem.aten.copy %3776, %3772, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %3778 = torch.aten.to.dtype %3777, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %3779 = torch.aten.sum.dim_IntList %3778, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3780 = torch.aten.div.Scalar %3779, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3781 = torch.aten.sub.Tensor %3778, %3780, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %3782 = torch.aten.mul.Tensor %3781, %3781 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %3783 = torch.aten.sum.dim_IntList %3782, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3784 = torch.aten.div.Scalar %3783, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3785 = torch.aten.to.dtype %3784, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3786 = torch.aten.sum.dim_IntList %3777, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3787 = torch.aten.div.Scalar %3786, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3788 = torch.aten.add.Tensor %3785, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3789 = torch.aten.rsqrt %3788 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3790 = torch.aten.sub.Tensor %3772, %3787, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %3791 = torch.aten.mul.Tensor %3790, %3789 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %3792 = torch.aten.view %3791, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %3793 = torch.aten.unsqueeze %390, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3794 = torch.aten.unsqueeze %3793, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3795 = torch.aten.mul.Tensor %3792, %3794 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %3796 = torch.aten.unsqueeze %391, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3797 = torch.aten.unsqueeze %3796, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3798 = torch.aten.add.Tensor %3795, %3797, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %3799 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3800 = torch.aten.to.dtype %3799, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3801 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3802 = torch.aten.broadcast_to %3800, %3801 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %3803 = torch.valsem.aten.copy %3802, %3798, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %3804 = torch.aten.sigmoid %3803 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %3805 = torch.aten.mul.Tensor %3804, %3803 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %3806 = torch.aten.convolution %3805, %392, %393, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3807 = torch.aten.convolution %3724, %394, %395, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3808 = torch.aten.add.Tensor %3807, %3806, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3809 = torch.aten.div.Tensor %3808, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
    %3810 = torch.aten.clone %3809, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3811 = torch.aten.view %3810, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %3812 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3813 = torch.aten.to.dtype %3812, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3814 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3815 = torch.aten.broadcast_to %3813, %3814 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %3816 = torch.valsem.aten.copy %3815, %3811, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %3817 = torch.aten.to.dtype %3816, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %3818 = torch.aten.sum.dim_IntList %3817, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3819 = torch.aten.div.Scalar %3818, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3820 = torch.aten.sub.Tensor %3817, %3819, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %3821 = torch.aten.mul.Tensor %3820, %3820 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %3822 = torch.aten.sum.dim_IntList %3821, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %3823 = torch.aten.div.Scalar %3822, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %3824 = torch.aten.to.dtype %3823, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3825 = torch.aten.sum.dim_IntList %3816, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %3826 = torch.aten.div.Scalar %3825, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3827 = torch.aten.add.Tensor %3824, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %3828 = torch.aten.rsqrt %3827 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %3829 = torch.aten.sub.Tensor %3811, %3826, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %3830 = torch.aten.mul.Tensor %3829, %3828 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %3831 = torch.aten.view %3830, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %3832 = torch.aten.unsqueeze %396, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3833 = torch.aten.unsqueeze %3832, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3834 = torch.aten.mul.Tensor %3831, %3833 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %3835 = torch.aten.unsqueeze %397, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %3836 = torch.aten.unsqueeze %3835, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %3837 = torch.aten.add.Tensor %3834, %3836, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %3838 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3839 = torch.aten.to.dtype %3838, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3840 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3841 = torch.aten.broadcast_to %3839, %3840 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %3842 = torch.valsem.aten.copy %3841, %3837, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %3843 = torch.aten.convolution %3842, %398, %399, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %3844 = torch.aten.permute %3843, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %3845 = torch.aten.view %3844, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3846 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3847 = torch.aten.sum.dim_IntList %3845, %3846, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3848 = torch.aten.div.Scalar %3847, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3849 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3850 = torch.aten.broadcast_to %3848, %3849 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3851 = torch.aten.sub.Tensor %3845, %3850, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3852 = torch.aten.mul.Tensor %3851, %3851 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3853 = torch.aten.sum.dim_IntList %3852, %3846, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3854 = torch.aten.div.Scalar %3853, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3855 = torch.aten.add.Scalar %3854, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3856 = torch.aten.rsqrt %3855 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %3857 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3858 = torch.aten.broadcast_to %3856, %3857 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3859 = torch.aten.mul.Tensor %3851, %3858 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3860 = torch.aten.mul.Tensor %3859, %400 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3861 = torch.aten.add.Tensor %3860, %401, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3862 = torch.aten.transpose.int %402, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3863 = torch.aten.view %3861, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3864 = torch.aten.mm %3863, %3862 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3865 = torch.aten.view %3864, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3866 = torch.aten.transpose.int %403, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3867 = torch.aten.view %3861, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3868 = torch.aten.mm %3867, %3866 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3869 = torch.aten.view %3868, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3870 = torch.aten.transpose.int %404, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3871 = torch.aten.view %3861, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3872 = torch.aten.mm %3871, %3870 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3873 = torch.aten.view %3872, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3874 = torch.aten.view %3865, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3875 = torch.aten.permute %3874, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3876 = torch.aten.clone %3875, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3877 = torch.aten.view %3876, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3878 = torch.aten.view %3869, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3879 = torch.aten.permute %3878, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3880 = torch.aten.clone %3879, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3881 = torch.aten.view %3880, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3882 = torch.aten.view %3873, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3883 = torch.aten.permute %3882, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3884 = torch.aten.clone %3883, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3885 = torch.aten.view %3884, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3886 = torch.aten.transpose.int %3881, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
    %3887 = torch.aten.broadcast_to %3877, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3888 = torch.aten.view %3887, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3889 = torch.aten.broadcast_to %3886, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %3890 = torch.aten.view %3889, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %3891 = torch.aten.bmm %3888, %3890 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %3892 = torch.aten.view %3891, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %3893 = torch.aten.mul.Tensor %3892, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
    %values_30, %indices_31 = torch.aten.max.dim %3893, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %3894 = torch.aten.sub.Tensor %3893, %values_30, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
    %3895 = torch.aten.exp %3894 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %3896 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3897 = torch.aten.sum.dim_IntList %3895, %3896, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %3898 = torch.aten.div.Tensor %3895, %3897 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
    %3899 = torch.aten.broadcast_to %3898, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %3900 = torch.aten.view %3899, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %3901 = torch.aten.broadcast_to %3885, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3902 = torch.aten.view %3901, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3903 = torch.aten.bmm %3900, %3902 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %3904 = torch.aten.view %3903, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3905 = torch.aten.view %3904, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3906 = torch.aten.permute %3905, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3907 = torch.aten.clone %3906, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %3908 = torch.aten.view %3907, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3909 = torch.aten.transpose.int %405, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3910 = torch.aten.view %3908, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3911 = torch.aten.mm %3910, %3909 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3912 = torch.aten.mul.Scalar %406, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3913 = torch.aten.add.Tensor %3912, %3911, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %3914 = torch.aten.view %3913, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3915 = torch.aten.add.Tensor %3914, %3845, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3916 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3917 = torch.aten.sum.dim_IntList %3915, %3916, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3918 = torch.aten.div.Scalar %3917, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3919 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3920 = torch.aten.broadcast_to %3918, %3919 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3921 = torch.aten.sub.Tensor %3915, %3920, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3922 = torch.aten.mul.Tensor %3921, %3921 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3923 = torch.aten.sum.dim_IntList %3922, %3916, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3924 = torch.aten.div.Scalar %3923, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3925 = torch.aten.add.Scalar %3924, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3926 = torch.aten.rsqrt %3925 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %3927 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3928 = torch.aten.broadcast_to %3926, %3927 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3929 = torch.aten.mul.Tensor %3921, %3928 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3930 = torch.aten.mul.Tensor %3929, %407 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3931 = torch.aten.add.Tensor %3930, %408, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3932 = torch.aten.transpose.int %409, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3933 = torch.aten.view %3931, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3934 = torch.aten.mm %3933, %3932 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3935 = torch.aten.view %3934, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3936 = torch.aten.transpose.int %410, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %3937 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %3938 = torch.aten.mm %3937, %3936 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %3939 = torch.aten.view %3938, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %3940 = torch.aten.transpose.int %411, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %3941 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %3942 = torch.aten.mm %3941, %3940 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %3943 = torch.aten.view %3942, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %3944 = torch.aten.view %3935, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3945 = torch.aten.permute %3944, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3946 = torch.aten.clone %3945, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %3947 = torch.aten.view %3946, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3948 = torch.aten.view %3939, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %3949 = torch.aten.permute %3948, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %3950 = torch.aten.clone %3949, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %3951 = torch.aten.view %3950, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3952 = torch.aten.view %3943, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %3953 = torch.aten.permute %3952, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %3954 = torch.aten.clone %3953, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %3955 = torch.aten.view %3954, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3956 = torch.aten.transpose.int %3951, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
    %3957 = torch.aten.broadcast_to %3947, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3958 = torch.aten.view %3957, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3959 = torch.aten.broadcast_to %3956, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %3960 = torch.aten.view %3959, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %3961 = torch.aten.bmm %3958, %3960 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %3962 = torch.aten.view %3961, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %3963 = torch.aten.mul.Tensor %3962, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
    %values_32, %indices_33 = torch.aten.max.dim %3963, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %3964 = torch.aten.sub.Tensor %3963, %values_32, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
    %3965 = torch.aten.exp %3964 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %3966 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3967 = torch.aten.sum.dim_IntList %3965, %3966, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %3968 = torch.aten.div.Tensor %3965, %3967 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
    %3969 = torch.aten.broadcast_to %3968, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %3970 = torch.aten.view %3969, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %3971 = torch.aten.broadcast_to %3955, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3972 = torch.aten.view %3971, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %3973 = torch.aten.bmm %3970, %3972 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %3974 = torch.aten.view %3973, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %3975 = torch.aten.view %3974, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %3976 = torch.aten.permute %3975, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %3977 = torch.aten.clone %3976, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %3978 = torch.aten.view %3977, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3979 = torch.aten.transpose.int %412, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3980 = torch.aten.view %3978, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %3981 = torch.aten.mm %3980, %3979 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %3982 = torch.aten.mul.Scalar %413, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3983 = torch.aten.add.Tensor %3982, %3981, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %3984 = torch.aten.view %3983, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3985 = torch.aten.add.Tensor %3984, %3915, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3986 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3987 = torch.aten.sum.dim_IntList %3985, %3986, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3988 = torch.aten.div.Scalar %3987, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3989 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3990 = torch.aten.broadcast_to %3988, %3989 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3991 = torch.aten.sub.Tensor %3985, %3990, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %3992 = torch.aten.mul.Tensor %3991, %3991 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %3993 = torch.aten.sum.dim_IntList %3992, %3986, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %3994 = torch.aten.div.Scalar %3993, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3995 = torch.aten.add.Scalar %3994, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %3996 = torch.aten.rsqrt %3995 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %3997 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3998 = torch.aten.broadcast_to %3996, %3997 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %3999 = torch.aten.mul.Tensor %3991, %3998 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4000 = torch.aten.mul.Tensor %3999, %414 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4001 = torch.aten.add.Tensor %4000, %415, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4002 = torch.aten.transpose.int %416, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %4003 = torch.aten.view %4001, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4004 = torch.aten.mm %4003, %4002 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
    %4005 = torch.aten.mul.Scalar %417, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %4006 = torch.aten.add.Tensor %4005, %4004, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
    %4007 = torch.aten.view %4006, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
    %4008 = torch.aten.slice.Tensor %4007, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %4009 = torch.aten.slice.Tensor %4007, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %4010 = torch.aten.gelu %4009, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
    %4011 = torch.aten.mul.Tensor %4008, %4010 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
    %4012 = torch.aten.transpose.int %418, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %4013 = torch.aten.view %4011, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
    %4014 = torch.aten.mm %4013, %4012 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4015 = torch.aten.mul.Scalar %419, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4016 = torch.aten.add.Tensor %4015, %4014, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %4017 = torch.aten.view %4016, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4018 = torch.aten.add.Tensor %4017, %3985, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4019 = torch.aten.view %4018, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %4020 = torch.aten.permute %4019, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %4021 = torch.aten.convolution %4020, %420, %421, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4022 = torch.aten.add.Tensor %4021, %3809, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4023 = torch.prim.ListConstruct %4022, %1977 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,640,16,16],f16>) -> !torch.list<vtensor>
    %4024 = torch.aten.cat %4023, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
    %4025 = torch.aten.clone %4024, %int0 : !torch.vtensor<[2,1920,16,16],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
    %4026 = torch.prim.ListConstruct %int2, %int32, %int60, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4027 = torch.aten.view %4025, %4026 : !torch.vtensor<[2,1920,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,256],f16>
    %4028 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4029 = torch.aten.to.dtype %4028, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4030 = torch.prim.ListConstruct %int2, %int32, %int60, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4031 = torch.aten.broadcast_to %4029, %4030 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,60,256],f32>
    %4032 = torch.valsem.aten.copy %4031, %4027, %false : !torch.vtensor<[2,32,60,256],f32>, !torch.vtensor<[2,32,60,256],f16>, !torch.bool -> !torch.vtensor<[2,32,60,256],f32>
    %4033 = torch.aten.to.dtype %4032, %int7, %false, %false, %none : !torch.vtensor<[2,32,60,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,256],f64>
    %4034 = torch.aten.sum.dim_IntList %4033, %754, %true, %none : !torch.vtensor<[2,32,60,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4035 = torch.aten.div.Scalar %4034, %int15360 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4036 = torch.aten.sub.Tensor %4033, %4035, %float1.000000e00 : !torch.vtensor<[2,32,60,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,60,256],f64>
    %4037 = torch.aten.mul.Tensor %4036, %4036 : !torch.vtensor<[2,32,60,256],f64>, !torch.vtensor<[2,32,60,256],f64> -> !torch.vtensor<[2,32,60,256],f64>
    %4038 = torch.aten.sum.dim_IntList %4037, %754, %true, %none : !torch.vtensor<[2,32,60,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4039 = torch.aten.div.Scalar %4038, %int15360 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4040 = torch.aten.to.dtype %4039, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4041 = torch.aten.sum.dim_IntList %4032, %754, %true, %none : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4042 = torch.aten.div.Scalar %4041, %int15360 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4043 = torch.aten.add.Tensor %4040, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4044 = torch.aten.rsqrt %4043 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4045 = torch.aten.sub.Tensor %4027, %4042, %int1 : !torch.vtensor<[2,32,60,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,256],f32>
    %4046 = torch.aten.mul.Tensor %4045, %4044 : !torch.vtensor<[2,32,60,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,256],f32>
    %4047 = torch.prim.ListConstruct %int2, %int1920, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4048 = torch.aten.view %4046, %4047 : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,16,16],f32>
    %4049 = torch.aten.unsqueeze %422, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
    %4050 = torch.aten.unsqueeze %4049, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
    %4051 = torch.aten.mul.Tensor %4048, %4050 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,16,16],f32>
    %4052 = torch.aten.unsqueeze %423, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
    %4053 = torch.aten.unsqueeze %4052, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
    %4054 = torch.aten.add.Tensor %4051, %4053, %int1 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f32>
    %4055 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4056 = torch.aten.to.dtype %4055, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4057 = torch.prim.ListConstruct %int2, %int1920, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4058 = torch.aten.broadcast_to %4056, %4057 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1920,16,16],f16>
    %4059 = torch.valsem.aten.copy %4058, %4054, %false : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[2,1920,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1920,16,16],f16>
    %4060 = torch.aten.sigmoid %4059 : !torch.vtensor<[2,1920,16,16],f16> -> !torch.vtensor<[2,1920,16,16],f16>
    %4061 = torch.aten.mul.Tensor %4060, %4059 : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[2,1920,16,16],f16> -> !torch.vtensor<[2,1920,16,16],f16>
    %4062 = torch.aten.convolution %4061, %424, %425, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4063 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4064 = torch.aten.mul.Tensor %4063, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4065 = torch.aten.transpose.int %426, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4066 = torch.aten.mm %4064, %4065 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4067 = torch.aten.mul.Scalar %427, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4068 = torch.aten.add.Tensor %4067, %4066, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
    %4069 = torch.aten.slice.Tensor %4068, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %4070 = torch.aten.slice.Tensor %4069, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
    %4071 = torch.aten.unsqueeze %4070, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
    %4072 = torch.aten.unsqueeze %4071, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
    %4073 = torch.aten.add.Tensor %4062, %4072, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4074 = torch.aten.view %4073, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %4075 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4076 = torch.aten.to.dtype %4075, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4077 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4078 = torch.aten.broadcast_to %4076, %4077 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %4079 = torch.valsem.aten.copy %4078, %4074, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %4080 = torch.aten.to.dtype %4079, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %4081 = torch.aten.sum.dim_IntList %4080, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4082 = torch.aten.div.Scalar %4081, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4083 = torch.aten.sub.Tensor %4080, %4082, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %4084 = torch.aten.mul.Tensor %4083, %4083 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %4085 = torch.aten.sum.dim_IntList %4084, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4086 = torch.aten.div.Scalar %4085, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4087 = torch.aten.to.dtype %4086, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4088 = torch.aten.sum.dim_IntList %4079, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4089 = torch.aten.div.Scalar %4088, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4090 = torch.aten.add.Tensor %4087, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4091 = torch.aten.rsqrt %4090 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4092 = torch.aten.sub.Tensor %4074, %4089, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %4093 = torch.aten.mul.Tensor %4092, %4091 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %4094 = torch.aten.view %4093, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %4095 = torch.aten.unsqueeze %428, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %4096 = torch.aten.unsqueeze %4095, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %4097 = torch.aten.mul.Tensor %4094, %4096 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %4098 = torch.aten.unsqueeze %429, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %4099 = torch.aten.unsqueeze %4098, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %4100 = torch.aten.add.Tensor %4097, %4099, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %4101 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4102 = torch.aten.to.dtype %4101, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4103 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4104 = torch.aten.broadcast_to %4102, %4103 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %4105 = torch.valsem.aten.copy %4104, %4100, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %4106 = torch.aten.sigmoid %4105 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %4107 = torch.aten.mul.Tensor %4106, %4105 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
    %4108 = torch.aten.convolution %4107, %430, %431, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4109 = torch.aten.convolution %4024, %432, %433, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4110 = torch.aten.add.Tensor %4109, %4108, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4111 = torch.aten.div.Tensor %4110, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
    %4112 = torch.aten.clone %4111, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4113 = torch.aten.view %4112, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
    %4114 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4115 = torch.aten.to.dtype %4114, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4116 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4117 = torch.aten.broadcast_to %4115, %4116 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
    %4118 = torch.valsem.aten.copy %4117, %4113, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
    %4119 = torch.aten.to.dtype %4118, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
    %4120 = torch.aten.sum.dim_IntList %4119, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4121 = torch.aten.div.Scalar %4120, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4122 = torch.aten.sub.Tensor %4119, %4121, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
    %4123 = torch.aten.mul.Tensor %4122, %4122 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
    %4124 = torch.aten.sum.dim_IntList %4123, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4125 = torch.aten.div.Scalar %4124, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4126 = torch.aten.to.dtype %4125, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4127 = torch.aten.sum.dim_IntList %4118, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4128 = torch.aten.div.Scalar %4127, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4129 = torch.aten.add.Tensor %4126, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4130 = torch.aten.rsqrt %4129 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4131 = torch.aten.sub.Tensor %4113, %4128, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
    %4132 = torch.aten.mul.Tensor %4131, %4130 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
    %4133 = torch.aten.view %4132, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
    %4134 = torch.aten.unsqueeze %434, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %4135 = torch.aten.unsqueeze %4134, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %4136 = torch.aten.mul.Tensor %4133, %4135 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
    %4137 = torch.aten.unsqueeze %435, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %4138 = torch.aten.unsqueeze %4137, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %4139 = torch.aten.add.Tensor %4136, %4138, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
    %4140 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4141 = torch.aten.to.dtype %4140, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4142 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4143 = torch.aten.broadcast_to %4141, %4142 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %4144 = torch.valsem.aten.copy %4143, %4139, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
    %4145 = torch.aten.convolution %4144, %436, %437, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4146 = torch.aten.permute %4145, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %4147 = torch.aten.view %4146, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4148 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4149 = torch.aten.sum.dim_IntList %4147, %4148, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %4150 = torch.aten.div.Scalar %4149, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4151 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4152 = torch.aten.broadcast_to %4150, %4151 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4153 = torch.aten.sub.Tensor %4147, %4152, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4154 = torch.aten.mul.Tensor %4153, %4153 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4155 = torch.aten.sum.dim_IntList %4154, %4148, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %4156 = torch.aten.div.Scalar %4155, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4157 = torch.aten.add.Scalar %4156, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4158 = torch.aten.rsqrt %4157 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %4159 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4160 = torch.aten.broadcast_to %4158, %4159 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4161 = torch.aten.mul.Tensor %4153, %4160 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4162 = torch.aten.mul.Tensor %4161, %438 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4163 = torch.aten.add.Tensor %4162, %439, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4164 = torch.aten.transpose.int %440, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4165 = torch.aten.view %4163, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4166 = torch.aten.mm %4165, %4164 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4167 = torch.aten.view %4166, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4168 = torch.aten.transpose.int %441, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4169 = torch.aten.view %4163, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4170 = torch.aten.mm %4169, %4168 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4171 = torch.aten.view %4170, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4172 = torch.aten.transpose.int %442, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4173 = torch.aten.view %4163, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4174 = torch.aten.mm %4173, %4172 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4175 = torch.aten.view %4174, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4176 = torch.aten.view %4167, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %4177 = torch.aten.permute %4176, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %4178 = torch.aten.clone %4177, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %4179 = torch.aten.view %4178, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4180 = torch.aten.view %4171, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %4181 = torch.aten.permute %4180, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %4182 = torch.aten.clone %4181, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %4183 = torch.aten.view %4182, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4184 = torch.aten.view %4175, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %4185 = torch.aten.permute %4184, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %4186 = torch.aten.clone %4185, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %4187 = torch.aten.view %4186, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4188 = torch.aten.transpose.int %4183, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
    %4189 = torch.aten.broadcast_to %4179, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4190 = torch.aten.view %4189, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4191 = torch.aten.broadcast_to %4188, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %4192 = torch.aten.view %4191, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
    %4193 = torch.aten.bmm %4190, %4192 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %4194 = torch.aten.view %4193, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %4195 = torch.aten.mul.Tensor %4194, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
    %values_34, %indices_35 = torch.aten.max.dim %4195, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %4196 = torch.aten.sub.Tensor %4195, %values_34, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
    %4197 = torch.aten.exp %4196 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
    %4198 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4199 = torch.aten.sum.dim_IntList %4197, %4198, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %4200 = torch.aten.div.Tensor %4197, %4199 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
    %4201 = torch.aten.broadcast_to %4200, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %4202 = torch.aten.view %4201, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
    %4203 = torch.aten.broadcast_to %4187, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4204 = torch.aten.view %4203, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4205 = torch.aten.bmm %4202, %4204 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %4206 = torch.aten.view %4205, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4207 = torch.aten.view %4206, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %4208 = torch.aten.permute %4207, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %4209 = torch.aten.clone %4208, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %4210 = torch.aten.view %4209, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4211 = torch.aten.transpose.int %443, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4212 = torch.aten.view %4210, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4213 = torch.aten.mm %4212, %4211 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4214 = torch.aten.mul.Scalar %444, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4215 = torch.aten.add.Tensor %4214, %4213, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %4216 = torch.aten.view %4215, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4217 = torch.aten.add.Tensor %4216, %4147, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4218 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4219 = torch.aten.sum.dim_IntList %4217, %4218, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %4220 = torch.aten.div.Scalar %4219, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4221 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4222 = torch.aten.broadcast_to %4220, %4221 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4223 = torch.aten.sub.Tensor %4217, %4222, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4224 = torch.aten.mul.Tensor %4223, %4223 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4225 = torch.aten.sum.dim_IntList %4224, %4218, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %4226 = torch.aten.div.Scalar %4225, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4227 = torch.aten.add.Scalar %4226, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4228 = torch.aten.rsqrt %4227 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %4229 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4230 = torch.aten.broadcast_to %4228, %4229 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4231 = torch.aten.mul.Tensor %4223, %4230 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4232 = torch.aten.mul.Tensor %4231, %445 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4233 = torch.aten.add.Tensor %4232, %446, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4234 = torch.aten.transpose.int %447, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4235 = torch.aten.view %4233, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4236 = torch.aten.mm %4235, %4234 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4237 = torch.aten.view %4236, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4238 = torch.aten.transpose.int %448, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %4239 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %4240 = torch.aten.mm %4239, %4238 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %4241 = torch.aten.view %4240, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %4242 = torch.aten.transpose.int %449, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
    %4243 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %4244 = torch.aten.mm %4243, %4242 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
    %4245 = torch.aten.view %4244, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
    %4246 = torch.aten.view %4237, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %4247 = torch.aten.permute %4246, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %4248 = torch.aten.clone %4247, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
    %4249 = torch.aten.view %4248, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4250 = torch.aten.view %4241, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %4251 = torch.aten.permute %4250, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %4252 = torch.aten.clone %4251, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %4253 = torch.aten.view %4252, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %4254 = torch.aten.view %4245, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
    %4255 = torch.aten.permute %4254, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
    %4256 = torch.aten.clone %4255, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
    %4257 = torch.aten.view %4256, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %4258 = torch.aten.transpose.int %4253, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
    %4259 = torch.aten.broadcast_to %4249, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4260 = torch.aten.view %4259, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4261 = torch.aten.broadcast_to %4258, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %4262 = torch.aten.view %4261, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
    %4263 = torch.aten.bmm %4260, %4262 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %4264 = torch.aten.view %4263, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %4265 = torch.aten.mul.Tensor %4264, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
    %values_36, %indices_37 = torch.aten.max.dim %4265, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
    %4266 = torch.aten.sub.Tensor %4265, %values_36, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
    %4267 = torch.aten.exp %4266 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
    %4268 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4269 = torch.aten.sum.dim_IntList %4267, %4268, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
    %4270 = torch.aten.div.Tensor %4267, %4269 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
    %4271 = torch.aten.broadcast_to %4270, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %4272 = torch.aten.view %4271, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
    %4273 = torch.aten.broadcast_to %4257, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %4274 = torch.aten.view %4273, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
    %4275 = torch.aten.bmm %4272, %4274 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
    %4276 = torch.aten.view %4275, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
    %4277 = torch.aten.view %4276, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
    %4278 = torch.aten.permute %4277, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
    %4279 = torch.aten.clone %4278, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
    %4280 = torch.aten.view %4279, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4281 = torch.aten.transpose.int %450, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4282 = torch.aten.view %4280, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4283 = torch.aten.mm %4282, %4281 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4284 = torch.aten.mul.Scalar %451, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4285 = torch.aten.add.Tensor %4284, %4283, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %4286 = torch.aten.view %4285, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4287 = torch.aten.add.Tensor %4286, %4217, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4288 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4289 = torch.aten.sum.dim_IntList %4287, %4288, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %4290 = torch.aten.div.Scalar %4289, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4291 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4292 = torch.aten.broadcast_to %4290, %4291 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4293 = torch.aten.sub.Tensor %4287, %4292, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4294 = torch.aten.mul.Tensor %4293, %4293 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4295 = torch.aten.sum.dim_IntList %4294, %4288, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
    %4296 = torch.aten.div.Scalar %4295, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4297 = torch.aten.add.Scalar %4296, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
    %4298 = torch.aten.rsqrt %4297 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
    %4299 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4300 = torch.aten.broadcast_to %4298, %4299 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4301 = torch.aten.mul.Tensor %4293, %4300 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4302 = torch.aten.mul.Tensor %4301, %452 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
    %4303 = torch.aten.add.Tensor %4302, %453, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4304 = torch.aten.transpose.int %454, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %4305 = torch.aten.view %4303, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
    %4306 = torch.aten.mm %4305, %4304 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
    %4307 = torch.aten.mul.Scalar %455, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %4308 = torch.aten.add.Tensor %4307, %4306, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
    %4309 = torch.aten.view %4308, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
    %4310 = torch.aten.slice.Tensor %4309, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %4311 = torch.aten.slice.Tensor %4309, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
    %4312 = torch.aten.gelu %4311, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
    %4313 = torch.aten.mul.Tensor %4310, %4312 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
    %4314 = torch.aten.transpose.int %456, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %4315 = torch.aten.view %4313, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
    %4316 = torch.aten.mm %4315, %4314 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
    %4317 = torch.aten.mul.Scalar %457, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4318 = torch.aten.add.Tensor %4317, %4316, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
    %4319 = torch.aten.view %4318, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
    %4320 = torch.aten.add.Tensor %4319, %4287, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
    %4321 = torch.aten.view %4320, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
    %4322 = torch.aten.permute %4321, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
    %4323 = torch.aten.convolution %4322, %458, %459, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4324 = torch.aten.add.Tensor %4323, %4111, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
    %4325 = torch.aten.upsample_nearest2d.vec %4324, %none, %3418 : !torch.vtensor<[2,1280,16,16],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,32,32],f16>
    %4326 = torch.aten.convolution %4325, %460, %461, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
    %4327 = torch.prim.ListConstruct %4326, %1976 : (!torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
    %4328 = torch.aten.cat %4327, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
    %4329 = torch.aten.clone %4328, %int0 : !torch.vtensor<[2,1920,32,32],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
    %4330 = torch.prim.ListConstruct %int2, %int32, %int60, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4331 = torch.aten.view %4329, %4330 : !torch.vtensor<[2,1920,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,1024],f16>
    %4332 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4333 = torch.aten.to.dtype %4332, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4334 = torch.prim.ListConstruct %int2, %int32, %int60, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4335 = torch.aten.broadcast_to %4333, %4334 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,60,1024],f32>
    %4336 = torch.valsem.aten.copy %4335, %4331, %false : !torch.vtensor<[2,32,60,1024],f32>, !torch.vtensor<[2,32,60,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,60,1024],f32>
    %4337 = torch.aten.to.dtype %4336, %int7, %false, %false, %none : !torch.vtensor<[2,32,60,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,1024],f64>
    %4338 = torch.aten.sum.dim_IntList %4337, %754, %true, %none : !torch.vtensor<[2,32,60,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4339 = torch.aten.div.Scalar %4338, %int61440 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4340 = torch.aten.sub.Tensor %4337, %4339, %float1.000000e00 : !torch.vtensor<[2,32,60,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,60,1024],f64>
    %4341 = torch.aten.mul.Tensor %4340, %4340 : !torch.vtensor<[2,32,60,1024],f64>, !torch.vtensor<[2,32,60,1024],f64> -> !torch.vtensor<[2,32,60,1024],f64>
    %4342 = torch.aten.sum.dim_IntList %4341, %754, %true, %none : !torch.vtensor<[2,32,60,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4343 = torch.aten.div.Scalar %4342, %int61440 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4344 = torch.aten.to.dtype %4343, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4345 = torch.aten.sum.dim_IntList %4336, %754, %true, %none : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4346 = torch.aten.div.Scalar %4345, %int61440 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4347 = torch.aten.add.Tensor %4344, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4348 = torch.aten.rsqrt %4347 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4349 = torch.aten.sub.Tensor %4331, %4346, %int1 : !torch.vtensor<[2,32,60,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,1024],f32>
    %4350 = torch.aten.mul.Tensor %4349, %4348 : !torch.vtensor<[2,32,60,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,1024],f32>
    %4351 = torch.prim.ListConstruct %int2, %int1920, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4352 = torch.aten.view %4350, %4351 : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,32,32],f32>
    %4353 = torch.aten.unsqueeze %462, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
    %4354 = torch.aten.unsqueeze %4353, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
    %4355 = torch.aten.mul.Tensor %4352, %4354 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,32,32],f32>
    %4356 = torch.aten.unsqueeze %463, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
    %4357 = torch.aten.unsqueeze %4356, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
    %4358 = torch.aten.add.Tensor %4355, %4357, %int1 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f32>
    %4359 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4360 = torch.aten.to.dtype %4359, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4361 = torch.prim.ListConstruct %int2, %int1920, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4362 = torch.aten.broadcast_to %4360, %4361 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1920,32,32],f16>
    %4363 = torch.valsem.aten.copy %4362, %4358, %false : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[2,1920,32,32],f32>, !torch.bool -> !torch.vtensor<[2,1920,32,32],f16>
    %4364 = torch.aten.sigmoid %4363 : !torch.vtensor<[2,1920,32,32],f16> -> !torch.vtensor<[2,1920,32,32],f16>
    %4365 = torch.aten.mul.Tensor %4364, %4363 : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[2,1920,32,32],f16> -> !torch.vtensor<[2,1920,32,32],f16>
    %4366 = torch.aten.convolution %4365, %464, %465, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4367 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4368 = torch.aten.mul.Tensor %4367, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4369 = torch.aten.transpose.int %466, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %4370 = torch.aten.mm %4368, %4369 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
    %4371 = torch.aten.mul.Scalar %467, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4372 = torch.aten.add.Tensor %4371, %4370, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
    %4373 = torch.aten.slice.Tensor %4372, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %4374 = torch.aten.slice.Tensor %4373, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %4375 = torch.aten.unsqueeze %4374, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
    %4376 = torch.aten.unsqueeze %4375, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
    %4377 = torch.aten.add.Tensor %4366, %4376, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4378 = torch.aten.view %4377, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %4379 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4380 = torch.aten.to.dtype %4379, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4381 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4382 = torch.aten.broadcast_to %4380, %4381 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %4383 = torch.valsem.aten.copy %4382, %4378, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %4384 = torch.aten.to.dtype %4383, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %4385 = torch.aten.sum.dim_IntList %4384, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4386 = torch.aten.div.Scalar %4385, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4387 = torch.aten.sub.Tensor %4384, %4386, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %4388 = torch.aten.mul.Tensor %4387, %4387 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %4389 = torch.aten.sum.dim_IntList %4388, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4390 = torch.aten.div.Scalar %4389, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4391 = torch.aten.to.dtype %4390, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4392 = torch.aten.sum.dim_IntList %4383, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4393 = torch.aten.div.Scalar %4392, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4394 = torch.aten.add.Tensor %4391, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4395 = torch.aten.rsqrt %4394 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4396 = torch.aten.sub.Tensor %4378, %4393, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %4397 = torch.aten.mul.Tensor %4396, %4395 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %4398 = torch.aten.view %4397, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %4399 = torch.aten.unsqueeze %468, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4400 = torch.aten.unsqueeze %4399, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4401 = torch.aten.mul.Tensor %4398, %4400 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %4402 = torch.aten.unsqueeze %469, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4403 = torch.aten.unsqueeze %4402, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4404 = torch.aten.add.Tensor %4401, %4403, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %4405 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4406 = torch.aten.to.dtype %4405, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4407 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4408 = torch.aten.broadcast_to %4406, %4407 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %4409 = torch.valsem.aten.copy %4408, %4404, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %4410 = torch.aten.sigmoid %4409 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %4411 = torch.aten.mul.Tensor %4410, %4409 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %4412 = torch.aten.convolution %4411, %470, %471, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4413 = torch.aten.convolution %4328, %472, %473, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4414 = torch.aten.add.Tensor %4413, %4412, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4415 = torch.aten.div.Tensor %4414, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
    %4416 = torch.aten.clone %4415, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4417 = torch.aten.view %4416, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %4418 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4419 = torch.aten.to.dtype %4418, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4420 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4421 = torch.aten.broadcast_to %4419, %4420 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %4422 = torch.valsem.aten.copy %4421, %4417, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %4423 = torch.aten.to.dtype %4422, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %4424 = torch.aten.sum.dim_IntList %4423, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4425 = torch.aten.div.Scalar %4424, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4426 = torch.aten.sub.Tensor %4423, %4425, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %4427 = torch.aten.mul.Tensor %4426, %4426 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %4428 = torch.aten.sum.dim_IntList %4427, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4429 = torch.aten.div.Scalar %4428, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4430 = torch.aten.to.dtype %4429, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4431 = torch.aten.sum.dim_IntList %4422, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4432 = torch.aten.div.Scalar %4431, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4433 = torch.aten.add.Tensor %4430, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4434 = torch.aten.rsqrt %4433 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4435 = torch.aten.sub.Tensor %4417, %4432, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %4436 = torch.aten.mul.Tensor %4435, %4434 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %4437 = torch.aten.view %4436, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %4438 = torch.aten.unsqueeze %474, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4439 = torch.aten.unsqueeze %4438, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4440 = torch.aten.mul.Tensor %4437, %4439 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %4441 = torch.aten.unsqueeze %475, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4442 = torch.aten.unsqueeze %4441, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4443 = torch.aten.add.Tensor %4440, %4442, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %4444 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4445 = torch.aten.to.dtype %4444, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4446 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4447 = torch.aten.broadcast_to %4445, %4446 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %4448 = torch.valsem.aten.copy %4447, %4443, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %4449 = torch.aten.convolution %4448, %476, %477, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4450 = torch.aten.permute %4449, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %4451 = torch.aten.view %4450, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4452 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4453 = torch.aten.sum.dim_IntList %4451, %4452, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4454 = torch.aten.div.Scalar %4453, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4455 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4456 = torch.aten.broadcast_to %4454, %4455 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4457 = torch.aten.sub.Tensor %4451, %4456, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4458 = torch.aten.mul.Tensor %4457, %4457 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4459 = torch.aten.sum.dim_IntList %4458, %4452, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4460 = torch.aten.div.Scalar %4459, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4461 = torch.aten.add.Scalar %4460, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4462 = torch.aten.rsqrt %4461 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %4463 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4464 = torch.aten.broadcast_to %4462, %4463 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4465 = torch.aten.mul.Tensor %4457, %4464 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4466 = torch.aten.mul.Tensor %4465, %478 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4467 = torch.aten.add.Tensor %4466, %479, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4468 = torch.aten.transpose.int %480, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4469 = torch.aten.view %4467, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4470 = torch.aten.mm %4469, %4468 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4471 = torch.aten.view %4470, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4472 = torch.aten.transpose.int %481, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4473 = torch.aten.view %4467, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4474 = torch.aten.mm %4473, %4472 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4475 = torch.aten.view %4474, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4476 = torch.aten.transpose.int %482, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4477 = torch.aten.view %4467, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4478 = torch.aten.mm %4477, %4476 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4479 = torch.aten.view %4478, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4480 = torch.aten.view %4471, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4481 = torch.aten.permute %4480, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4482 = torch.aten.clone %4481, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4483 = torch.aten.view %4482, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4484 = torch.aten.view %4475, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4485 = torch.aten.permute %4484, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4486 = torch.aten.clone %4485, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4487 = torch.aten.view %4486, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4488 = torch.aten.view %4479, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4489 = torch.aten.permute %4488, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4490 = torch.aten.clone %4489, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4491 = torch.aten.view %4490, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4492 = torch.aten.transpose.int %4487, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
    %4493 = torch.aten.broadcast_to %4483, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4494 = torch.aten.view %4493, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4495 = torch.aten.broadcast_to %4492, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %4496 = torch.aten.view %4495, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %4497 = torch.aten.bmm %4494, %4496 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %4498 = torch.aten.view %4497, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %4499 = torch.aten.mul.Tensor %4498, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
    %values_38, %indices_39 = torch.aten.max.dim %4499, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %4500 = torch.aten.sub.Tensor %4499, %values_38, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
    %4501 = torch.aten.exp %4500 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %4502 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4503 = torch.aten.sum.dim_IntList %4501, %4502, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %4504 = torch.aten.div.Tensor %4501, %4503 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %4505 = torch.aten.broadcast_to %4504, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %4506 = torch.aten.view %4505, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %4507 = torch.aten.broadcast_to %4491, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4508 = torch.aten.view %4507, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4509 = torch.aten.bmm %4506, %4508 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %4510 = torch.aten.view %4509, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4511 = torch.aten.view %4510, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4512 = torch.aten.permute %4511, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4513 = torch.aten.clone %4512, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %4514 = torch.aten.view %4513, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4515 = torch.aten.transpose.int %483, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4516 = torch.aten.view %4514, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4517 = torch.aten.mm %4516, %4515 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4518 = torch.aten.mul.Scalar %484, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4519 = torch.aten.add.Tensor %4518, %4517, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %4520 = torch.aten.view %4519, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4521 = torch.aten.add.Tensor %4520, %4451, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4522 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4523 = torch.aten.sum.dim_IntList %4521, %4522, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4524 = torch.aten.div.Scalar %4523, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4525 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4526 = torch.aten.broadcast_to %4524, %4525 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4527 = torch.aten.sub.Tensor %4521, %4526, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4528 = torch.aten.mul.Tensor %4527, %4527 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4529 = torch.aten.sum.dim_IntList %4528, %4522, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4530 = torch.aten.div.Scalar %4529, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4531 = torch.aten.add.Scalar %4530, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4532 = torch.aten.rsqrt %4531 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %4533 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4534 = torch.aten.broadcast_to %4532, %4533 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4535 = torch.aten.mul.Tensor %4527, %4534 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4536 = torch.aten.mul.Tensor %4535, %485 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4537 = torch.aten.add.Tensor %4536, %486, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4538 = torch.aten.transpose.int %487, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4539 = torch.aten.view %4537, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4540 = torch.aten.mm %4539, %4538 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4541 = torch.aten.view %4540, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4542 = torch.aten.transpose.int %488, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %4543 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %4544 = torch.aten.mm %4543, %4542 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %4545 = torch.aten.view %4544, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %4546 = torch.aten.transpose.int %489, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %4547 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %4548 = torch.aten.mm %4547, %4546 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %4549 = torch.aten.view %4548, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %4550 = torch.aten.view %4541, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4551 = torch.aten.permute %4550, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4552 = torch.aten.clone %4551, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4553 = torch.aten.view %4552, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4554 = torch.aten.view %4545, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %4555 = torch.aten.permute %4554, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %4556 = torch.aten.clone %4555, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %4557 = torch.aten.view %4556, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4558 = torch.aten.view %4549, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %4559 = torch.aten.permute %4558, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %4560 = torch.aten.clone %4559, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %4561 = torch.aten.view %4560, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4562 = torch.aten.transpose.int %4557, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
    %4563 = torch.aten.broadcast_to %4553, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4564 = torch.aten.view %4563, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4565 = torch.aten.broadcast_to %4562, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %4566 = torch.aten.view %4565, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %4567 = torch.aten.bmm %4564, %4566 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %4568 = torch.aten.view %4567, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %4569 = torch.aten.mul.Tensor %4568, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
    %values_40, %indices_41 = torch.aten.max.dim %4569, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %4570 = torch.aten.sub.Tensor %4569, %values_40, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
    %4571 = torch.aten.exp %4570 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %4572 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4573 = torch.aten.sum.dim_IntList %4571, %4572, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %4574 = torch.aten.div.Tensor %4571, %4573 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
    %4575 = torch.aten.broadcast_to %4574, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %4576 = torch.aten.view %4575, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %4577 = torch.aten.broadcast_to %4561, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4578 = torch.aten.view %4577, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4579 = torch.aten.bmm %4576, %4578 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %4580 = torch.aten.view %4579, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4581 = torch.aten.view %4580, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4582 = torch.aten.permute %4581, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4583 = torch.aten.clone %4582, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %4584 = torch.aten.view %4583, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4585 = torch.aten.transpose.int %490, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4586 = torch.aten.view %4584, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4587 = torch.aten.mm %4586, %4585 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4588 = torch.aten.mul.Scalar %491, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4589 = torch.aten.add.Tensor %4588, %4587, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %4590 = torch.aten.view %4589, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4591 = torch.aten.add.Tensor %4590, %4521, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4592 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4593 = torch.aten.sum.dim_IntList %4591, %4592, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4594 = torch.aten.div.Scalar %4593, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4595 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4596 = torch.aten.broadcast_to %4594, %4595 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4597 = torch.aten.sub.Tensor %4591, %4596, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4598 = torch.aten.mul.Tensor %4597, %4597 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4599 = torch.aten.sum.dim_IntList %4598, %4592, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4600 = torch.aten.div.Scalar %4599, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4601 = torch.aten.add.Scalar %4600, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4602 = torch.aten.rsqrt %4601 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %4603 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4604 = torch.aten.broadcast_to %4602, %4603 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4605 = torch.aten.mul.Tensor %4597, %4604 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4606 = torch.aten.mul.Tensor %4605, %492 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4607 = torch.aten.add.Tensor %4606, %493, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4608 = torch.aten.transpose.int %494, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %4609 = torch.aten.view %4607, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4610 = torch.aten.mm %4609, %4608 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
    %4611 = torch.aten.mul.Scalar %495, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %4612 = torch.aten.add.Tensor %4611, %4610, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
    %4613 = torch.aten.view %4612, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
    %4614 = torch.aten.slice.Tensor %4613, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %4615 = torch.aten.slice.Tensor %4613, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %4616 = torch.aten.gelu %4615, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
    %4617 = torch.aten.mul.Tensor %4614, %4616 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
    %4618 = torch.aten.transpose.int %496, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %4619 = torch.aten.view %4617, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
    %4620 = torch.aten.mm %4619, %4618 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4621 = torch.aten.mul.Scalar %497, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4622 = torch.aten.add.Tensor %4621, %4620, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %4623 = torch.aten.view %4622, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4624 = torch.aten.add.Tensor %4623, %4591, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4625 = torch.aten.view %4624, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %4626 = torch.aten.permute %4625, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %4627 = torch.aten.convolution %4626, %498, %499, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4628 = torch.aten.add.Tensor %4627, %4415, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4629 = torch.prim.ListConstruct %4628, %1679 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
    %4630 = torch.aten.cat %4629, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
    %4631 = torch.aten.clone %4630, %int0 : !torch.vtensor<[2,1280,32,32],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
    %4632 = torch.prim.ListConstruct %int2, %int32, %int40, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4633 = torch.aten.view %4631, %4632 : !torch.vtensor<[2,1280,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,1024],f16>
    %4634 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4635 = torch.aten.to.dtype %4634, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4636 = torch.prim.ListConstruct %int2, %int32, %int40, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4637 = torch.aten.broadcast_to %4635, %4636 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,1024],f32>
    %4638 = torch.valsem.aten.copy %4637, %4633, %false : !torch.vtensor<[2,32,40,1024],f32>, !torch.vtensor<[2,32,40,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,40,1024],f32>
    %4639 = torch.aten.to.dtype %4638, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,1024],f64>
    %4640 = torch.aten.sum.dim_IntList %4639, %754, %true, %none : !torch.vtensor<[2,32,40,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4641 = torch.aten.div.Scalar %4640, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4642 = torch.aten.sub.Tensor %4639, %4641, %float1.000000e00 : !torch.vtensor<[2,32,40,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,1024],f64>
    %4643 = torch.aten.mul.Tensor %4642, %4642 : !torch.vtensor<[2,32,40,1024],f64>, !torch.vtensor<[2,32,40,1024],f64> -> !torch.vtensor<[2,32,40,1024],f64>
    %4644 = torch.aten.sum.dim_IntList %4643, %754, %true, %none : !torch.vtensor<[2,32,40,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4645 = torch.aten.div.Scalar %4644, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4646 = torch.aten.to.dtype %4645, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4647 = torch.aten.sum.dim_IntList %4638, %754, %true, %none : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4648 = torch.aten.div.Scalar %4647, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4649 = torch.aten.add.Tensor %4646, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4650 = torch.aten.rsqrt %4649 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4651 = torch.aten.sub.Tensor %4633, %4648, %int1 : !torch.vtensor<[2,32,40,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,1024],f32>
    %4652 = torch.aten.mul.Tensor %4651, %4650 : !torch.vtensor<[2,32,40,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,1024],f32>
    %4653 = torch.prim.ListConstruct %int2, %int1280, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4654 = torch.aten.view %4652, %4653 : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,32,32],f32>
    %4655 = torch.aten.unsqueeze %500, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %4656 = torch.aten.unsqueeze %4655, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %4657 = torch.aten.mul.Tensor %4654, %4656 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,32,32],f32>
    %4658 = torch.aten.unsqueeze %501, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
    %4659 = torch.aten.unsqueeze %4658, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
    %4660 = torch.aten.add.Tensor %4657, %4659, %int1 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f32>
    %4661 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4662 = torch.aten.to.dtype %4661, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4663 = torch.prim.ListConstruct %int2, %int1280, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4664 = torch.aten.broadcast_to %4662, %4663 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,32,32],f16>
    %4665 = torch.valsem.aten.copy %4664, %4660, %false : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,1280,32,32],f32>, !torch.bool -> !torch.vtensor<[2,1280,32,32],f16>
    %4666 = torch.aten.sigmoid %4665 : !torch.vtensor<[2,1280,32,32],f16> -> !torch.vtensor<[2,1280,32,32],f16>
    %4667 = torch.aten.mul.Tensor %4666, %4665 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,1280,32,32],f16> -> !torch.vtensor<[2,1280,32,32],f16>
    %4668 = torch.aten.convolution %4667, %502, %503, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4669 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4670 = torch.aten.mul.Tensor %4669, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4671 = torch.aten.transpose.int %504, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %4672 = torch.aten.mm %4670, %4671 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
    %4673 = torch.aten.mul.Scalar %505, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4674 = torch.aten.add.Tensor %4673, %4672, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
    %4675 = torch.aten.slice.Tensor %4674, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %4676 = torch.aten.slice.Tensor %4675, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %4677 = torch.aten.unsqueeze %4676, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
    %4678 = torch.aten.unsqueeze %4677, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
    %4679 = torch.aten.add.Tensor %4668, %4678, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4680 = torch.aten.view %4679, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %4681 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4682 = torch.aten.to.dtype %4681, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4683 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4684 = torch.aten.broadcast_to %4682, %4683 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %4685 = torch.valsem.aten.copy %4684, %4680, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %4686 = torch.aten.to.dtype %4685, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %4687 = torch.aten.sum.dim_IntList %4686, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4688 = torch.aten.div.Scalar %4687, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4689 = torch.aten.sub.Tensor %4686, %4688, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %4690 = torch.aten.mul.Tensor %4689, %4689 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %4691 = torch.aten.sum.dim_IntList %4690, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4692 = torch.aten.div.Scalar %4691, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4693 = torch.aten.to.dtype %4692, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4694 = torch.aten.sum.dim_IntList %4685, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4695 = torch.aten.div.Scalar %4694, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4696 = torch.aten.add.Tensor %4693, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4697 = torch.aten.rsqrt %4696 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4698 = torch.aten.sub.Tensor %4680, %4695, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %4699 = torch.aten.mul.Tensor %4698, %4697 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %4700 = torch.aten.view %4699, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %4701 = torch.aten.unsqueeze %506, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4702 = torch.aten.unsqueeze %4701, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4703 = torch.aten.mul.Tensor %4700, %4702 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %4704 = torch.aten.unsqueeze %507, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4705 = torch.aten.unsqueeze %4704, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4706 = torch.aten.add.Tensor %4703, %4705, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %4707 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4708 = torch.aten.to.dtype %4707, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4709 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4710 = torch.aten.broadcast_to %4708, %4709 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %4711 = torch.valsem.aten.copy %4710, %4706, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %4712 = torch.aten.sigmoid %4711 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %4713 = torch.aten.mul.Tensor %4712, %4711 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %4714 = torch.aten.convolution %4713, %508, %509, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4715 = torch.aten.convolution %4630, %510, %511, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4716 = torch.aten.add.Tensor %4715, %4714, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4717 = torch.aten.div.Tensor %4716, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
    %4718 = torch.aten.clone %4717, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4719 = torch.aten.view %4718, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %4720 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4721 = torch.aten.to.dtype %4720, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4722 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4723 = torch.aten.broadcast_to %4721, %4722 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %4724 = torch.valsem.aten.copy %4723, %4719, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %4725 = torch.aten.to.dtype %4724, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %4726 = torch.aten.sum.dim_IntList %4725, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4727 = torch.aten.div.Scalar %4726, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4728 = torch.aten.sub.Tensor %4725, %4727, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %4729 = torch.aten.mul.Tensor %4728, %4728 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %4730 = torch.aten.sum.dim_IntList %4729, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4731 = torch.aten.div.Scalar %4730, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4732 = torch.aten.to.dtype %4731, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4733 = torch.aten.sum.dim_IntList %4724, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4734 = torch.aten.div.Scalar %4733, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4735 = torch.aten.add.Tensor %4732, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4736 = torch.aten.rsqrt %4735 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4737 = torch.aten.sub.Tensor %4719, %4734, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %4738 = torch.aten.mul.Tensor %4737, %4736 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %4739 = torch.aten.view %4738, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %4740 = torch.aten.unsqueeze %512, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4741 = torch.aten.unsqueeze %4740, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4742 = torch.aten.mul.Tensor %4739, %4741 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %4743 = torch.aten.unsqueeze %513, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %4744 = torch.aten.unsqueeze %4743, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %4745 = torch.aten.add.Tensor %4742, %4744, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %4746 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4747 = torch.aten.to.dtype %4746, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4748 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4749 = torch.aten.broadcast_to %4747, %4748 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %4750 = torch.valsem.aten.copy %4749, %4745, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %4751 = torch.aten.convolution %4750, %514, %515, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4752 = torch.aten.permute %4751, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %4753 = torch.aten.view %4752, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4754 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4755 = torch.aten.sum.dim_IntList %4753, %4754, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4756 = torch.aten.div.Scalar %4755, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4757 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4758 = torch.aten.broadcast_to %4756, %4757 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4759 = torch.aten.sub.Tensor %4753, %4758, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4760 = torch.aten.mul.Tensor %4759, %4759 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4761 = torch.aten.sum.dim_IntList %4760, %4754, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4762 = torch.aten.div.Scalar %4761, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4763 = torch.aten.add.Scalar %4762, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4764 = torch.aten.rsqrt %4763 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %4765 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4766 = torch.aten.broadcast_to %4764, %4765 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4767 = torch.aten.mul.Tensor %4759, %4766 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4768 = torch.aten.mul.Tensor %4767, %516 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4769 = torch.aten.add.Tensor %4768, %517, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4770 = torch.aten.transpose.int %518, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4771 = torch.aten.view %4769, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4772 = torch.aten.mm %4771, %4770 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4773 = torch.aten.view %4772, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4774 = torch.aten.transpose.int %519, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4775 = torch.aten.view %4769, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4776 = torch.aten.mm %4775, %4774 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4777 = torch.aten.view %4776, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4778 = torch.aten.transpose.int %520, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4779 = torch.aten.view %4769, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4780 = torch.aten.mm %4779, %4778 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4781 = torch.aten.view %4780, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4782 = torch.aten.view %4773, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4783 = torch.aten.permute %4782, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4784 = torch.aten.clone %4783, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4785 = torch.aten.view %4784, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4786 = torch.aten.view %4777, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4787 = torch.aten.permute %4786, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4788 = torch.aten.clone %4787, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4789 = torch.aten.view %4788, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4790 = torch.aten.view %4781, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4791 = torch.aten.permute %4790, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4792 = torch.aten.clone %4791, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4793 = torch.aten.view %4792, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4794 = torch.aten.transpose.int %4789, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
    %4795 = torch.aten.broadcast_to %4785, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4796 = torch.aten.view %4795, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4797 = torch.aten.broadcast_to %4794, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %4798 = torch.aten.view %4797, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %4799 = torch.aten.bmm %4796, %4798 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %4800 = torch.aten.view %4799, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %4801 = torch.aten.mul.Tensor %4800, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
    %values_42, %indices_43 = torch.aten.max.dim %4801, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %4802 = torch.aten.sub.Tensor %4801, %values_42, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
    %4803 = torch.aten.exp %4802 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %4804 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4805 = torch.aten.sum.dim_IntList %4803, %4804, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %4806 = torch.aten.div.Tensor %4803, %4805 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %4807 = torch.aten.broadcast_to %4806, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %4808 = torch.aten.view %4807, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %4809 = torch.aten.broadcast_to %4793, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4810 = torch.aten.view %4809, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4811 = torch.aten.bmm %4808, %4810 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %4812 = torch.aten.view %4811, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4813 = torch.aten.view %4812, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4814 = torch.aten.permute %4813, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4815 = torch.aten.clone %4814, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %4816 = torch.aten.view %4815, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4817 = torch.aten.transpose.int %521, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4818 = torch.aten.view %4816, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4819 = torch.aten.mm %4818, %4817 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4820 = torch.aten.mul.Scalar %522, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4821 = torch.aten.add.Tensor %4820, %4819, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %4822 = torch.aten.view %4821, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4823 = torch.aten.add.Tensor %4822, %4753, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4824 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4825 = torch.aten.sum.dim_IntList %4823, %4824, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4826 = torch.aten.div.Scalar %4825, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4827 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4828 = torch.aten.broadcast_to %4826, %4827 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4829 = torch.aten.sub.Tensor %4823, %4828, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4830 = torch.aten.mul.Tensor %4829, %4829 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4831 = torch.aten.sum.dim_IntList %4830, %4824, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4832 = torch.aten.div.Scalar %4831, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4833 = torch.aten.add.Scalar %4832, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4834 = torch.aten.rsqrt %4833 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %4835 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4836 = torch.aten.broadcast_to %4834, %4835 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4837 = torch.aten.mul.Tensor %4829, %4836 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4838 = torch.aten.mul.Tensor %4837, %523 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4839 = torch.aten.add.Tensor %4838, %524, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4840 = torch.aten.transpose.int %525, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4841 = torch.aten.view %4839, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4842 = torch.aten.mm %4841, %4840 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4843 = torch.aten.view %4842, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4844 = torch.aten.transpose.int %526, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %4845 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %4846 = torch.aten.mm %4845, %4844 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %4847 = torch.aten.view %4846, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %4848 = torch.aten.transpose.int %527, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %4849 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %4850 = torch.aten.mm %4849, %4848 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %4851 = torch.aten.view %4850, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %4852 = torch.aten.view %4843, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4853 = torch.aten.permute %4852, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4854 = torch.aten.clone %4853, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %4855 = torch.aten.view %4854, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4856 = torch.aten.view %4847, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %4857 = torch.aten.permute %4856, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %4858 = torch.aten.clone %4857, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %4859 = torch.aten.view %4858, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4860 = torch.aten.view %4851, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %4861 = torch.aten.permute %4860, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %4862 = torch.aten.clone %4861, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %4863 = torch.aten.view %4862, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4864 = torch.aten.transpose.int %4859, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
    %4865 = torch.aten.broadcast_to %4855, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4866 = torch.aten.view %4865, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4867 = torch.aten.broadcast_to %4864, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %4868 = torch.aten.view %4867, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %4869 = torch.aten.bmm %4866, %4868 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %4870 = torch.aten.view %4869, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %4871 = torch.aten.mul.Tensor %4870, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
    %values_44, %indices_45 = torch.aten.max.dim %4871, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %4872 = torch.aten.sub.Tensor %4871, %values_44, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
    %4873 = torch.aten.exp %4872 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %4874 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4875 = torch.aten.sum.dim_IntList %4873, %4874, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %4876 = torch.aten.div.Tensor %4873, %4875 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
    %4877 = torch.aten.broadcast_to %4876, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %4878 = torch.aten.view %4877, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %4879 = torch.aten.broadcast_to %4863, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4880 = torch.aten.view %4879, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %4881 = torch.aten.bmm %4878, %4880 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %4882 = torch.aten.view %4881, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %4883 = torch.aten.view %4882, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %4884 = torch.aten.permute %4883, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %4885 = torch.aten.clone %4884, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %4886 = torch.aten.view %4885, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4887 = torch.aten.transpose.int %528, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %4888 = torch.aten.view %4886, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4889 = torch.aten.mm %4888, %4887 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4890 = torch.aten.mul.Scalar %529, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4891 = torch.aten.add.Tensor %4890, %4889, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %4892 = torch.aten.view %4891, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4893 = torch.aten.add.Tensor %4892, %4823, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4894 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4895 = torch.aten.sum.dim_IntList %4893, %4894, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4896 = torch.aten.div.Scalar %4895, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4897 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4898 = torch.aten.broadcast_to %4896, %4897 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4899 = torch.aten.sub.Tensor %4893, %4898, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4900 = torch.aten.mul.Tensor %4899, %4899 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4901 = torch.aten.sum.dim_IntList %4900, %4894, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %4902 = torch.aten.div.Scalar %4901, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4903 = torch.aten.add.Scalar %4902, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %4904 = torch.aten.rsqrt %4903 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %4905 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4906 = torch.aten.broadcast_to %4904, %4905 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4907 = torch.aten.mul.Tensor %4899, %4906 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4908 = torch.aten.mul.Tensor %4907, %530 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %4909 = torch.aten.add.Tensor %4908, %531, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4910 = torch.aten.transpose.int %532, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %4911 = torch.aten.view %4909, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %4912 = torch.aten.mm %4911, %4910 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
    %4913 = torch.aten.mul.Scalar %533, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %4914 = torch.aten.add.Tensor %4913, %4912, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
    %4915 = torch.aten.view %4914, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
    %4916 = torch.aten.slice.Tensor %4915, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %4917 = torch.aten.slice.Tensor %4915, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %4918 = torch.aten.gelu %4917, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
    %4919 = torch.aten.mul.Tensor %4916, %4918 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
    %4920 = torch.aten.transpose.int %534, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %4921 = torch.aten.view %4919, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
    %4922 = torch.aten.mm %4921, %4920 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
    %4923 = torch.aten.mul.Scalar %535, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4924 = torch.aten.add.Tensor %4923, %4922, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %4925 = torch.aten.view %4924, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %4926 = torch.aten.add.Tensor %4925, %4893, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %4927 = torch.aten.view %4926, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %4928 = torch.aten.permute %4927, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %4929 = torch.aten.convolution %4928, %536, %537, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4930 = torch.aten.add.Tensor %4929, %4717, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4931 = torch.prim.ListConstruct %4930, %1362 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,320,32,32],f16>) -> !torch.list<vtensor>
    %4932 = torch.aten.cat %4931, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
    %4933 = torch.aten.clone %4932, %int0 : !torch.vtensor<[2,960,32,32],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
    %4934 = torch.prim.ListConstruct %int2, %int32, %int30, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4935 = torch.aten.view %4933, %4934 : !torch.vtensor<[2,960,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,1024],f16>
    %4936 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4937 = torch.aten.to.dtype %4936, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4938 = torch.prim.ListConstruct %int2, %int32, %int30, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4939 = torch.aten.broadcast_to %4937, %4938 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,30,1024],f32>
    %4940 = torch.valsem.aten.copy %4939, %4935, %false : !torch.vtensor<[2,32,30,1024],f32>, !torch.vtensor<[2,32,30,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,30,1024],f32>
    %4941 = torch.aten.to.dtype %4940, %int7, %false, %false, %none : !torch.vtensor<[2,32,30,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,1024],f64>
    %4942 = torch.aten.sum.dim_IntList %4941, %754, %true, %none : !torch.vtensor<[2,32,30,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4943 = torch.aten.div.Scalar %4942, %int30720 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4944 = torch.aten.sub.Tensor %4941, %4943, %float1.000000e00 : !torch.vtensor<[2,32,30,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,30,1024],f64>
    %4945 = torch.aten.mul.Tensor %4944, %4944 : !torch.vtensor<[2,32,30,1024],f64>, !torch.vtensor<[2,32,30,1024],f64> -> !torch.vtensor<[2,32,30,1024],f64>
    %4946 = torch.aten.sum.dim_IntList %4945, %754, %true, %none : !torch.vtensor<[2,32,30,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4947 = torch.aten.div.Scalar %4946, %int30720 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4948 = torch.aten.to.dtype %4947, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4949 = torch.aten.sum.dim_IntList %4940, %754, %true, %none : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4950 = torch.aten.div.Scalar %4949, %int30720 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4951 = torch.aten.add.Tensor %4948, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4952 = torch.aten.rsqrt %4951 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %4953 = torch.aten.sub.Tensor %4935, %4950, %int1 : !torch.vtensor<[2,32,30,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,1024],f32>
    %4954 = torch.aten.mul.Tensor %4953, %4952 : !torch.vtensor<[2,32,30,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,1024],f32>
    %4955 = torch.prim.ListConstruct %int2, %int960, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4956 = torch.aten.view %4954, %4955 : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,960,32,32],f32>
    %4957 = torch.aten.unsqueeze %538, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
    %4958 = torch.aten.unsqueeze %4957, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
    %4959 = torch.aten.mul.Tensor %4956, %4958 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,32,32],f32>
    %4960 = torch.aten.unsqueeze %539, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
    %4961 = torch.aten.unsqueeze %4960, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
    %4962 = torch.aten.add.Tensor %4959, %4961, %int1 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f32>
    %4963 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4964 = torch.aten.to.dtype %4963, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4965 = torch.prim.ListConstruct %int2, %int960, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4966 = torch.aten.broadcast_to %4964, %4965 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,960,32,32],f16>
    %4967 = torch.valsem.aten.copy %4966, %4962, %false : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[2,960,32,32],f32>, !torch.bool -> !torch.vtensor<[2,960,32,32],f16>
    %4968 = torch.aten.sigmoid %4967 : !torch.vtensor<[2,960,32,32],f16> -> !torch.vtensor<[2,960,32,32],f16>
    %4969 = torch.aten.mul.Tensor %4968, %4967 : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[2,960,32,32],f16> -> !torch.vtensor<[2,960,32,32],f16>
    %4970 = torch.aten.convolution %4969, %540, %541, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4971 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4972 = torch.aten.mul.Tensor %4971, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %4973 = torch.aten.transpose.int %542, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %4974 = torch.aten.mm %4972, %4973 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
    %4975 = torch.aten.mul.Scalar %543, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %4976 = torch.aten.add.Tensor %4975, %4974, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
    %4977 = torch.aten.slice.Tensor %4976, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %4978 = torch.aten.slice.Tensor %4977, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
    %4979 = torch.aten.unsqueeze %4978, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
    %4980 = torch.aten.unsqueeze %4979, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
    %4981 = torch.aten.add.Tensor %4970, %4980, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %4982 = torch.aten.view %4981, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %4983 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4984 = torch.aten.to.dtype %4983, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4985 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4986 = torch.aten.broadcast_to %4984, %4985 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %4987 = torch.valsem.aten.copy %4986, %4982, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %4988 = torch.aten.to.dtype %4987, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %4989 = torch.aten.sum.dim_IntList %4988, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4990 = torch.aten.div.Scalar %4989, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4991 = torch.aten.sub.Tensor %4988, %4990, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %4992 = torch.aten.mul.Tensor %4991, %4991 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %4993 = torch.aten.sum.dim_IntList %4992, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %4994 = torch.aten.div.Scalar %4993, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %4995 = torch.aten.to.dtype %4994, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4996 = torch.aten.sum.dim_IntList %4987, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %4997 = torch.aten.div.Scalar %4996, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4998 = torch.aten.add.Tensor %4995, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %4999 = torch.aten.rsqrt %4998 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5000 = torch.aten.sub.Tensor %4982, %4997, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %5001 = torch.aten.mul.Tensor %5000, %4999 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %5002 = torch.aten.view %5001, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %5003 = torch.aten.unsqueeze %544, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5004 = torch.aten.unsqueeze %5003, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5005 = torch.aten.mul.Tensor %5002, %5004 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %5006 = torch.aten.unsqueeze %545, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5007 = torch.aten.unsqueeze %5006, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5008 = torch.aten.add.Tensor %5005, %5007, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %5009 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5010 = torch.aten.to.dtype %5009, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5011 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5012 = torch.aten.broadcast_to %5010, %5011 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %5013 = torch.valsem.aten.copy %5012, %5008, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %5014 = torch.aten.sigmoid %5013 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %5015 = torch.aten.mul.Tensor %5014, %5013 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
    %5016 = torch.aten.convolution %5015, %546, %547, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5017 = torch.aten.convolution %4932, %548, %549, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5018 = torch.aten.add.Tensor %5017, %5016, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5019 = torch.aten.div.Tensor %5018, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
    %5020 = torch.aten.clone %5019, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5021 = torch.aten.view %5020, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
    %5022 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5023 = torch.aten.to.dtype %5022, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5024 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5025 = torch.aten.broadcast_to %5023, %5024 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
    %5026 = torch.valsem.aten.copy %5025, %5021, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
    %5027 = torch.aten.to.dtype %5026, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
    %5028 = torch.aten.sum.dim_IntList %5027, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5029 = torch.aten.div.Scalar %5028, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5030 = torch.aten.sub.Tensor %5027, %5029, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
    %5031 = torch.aten.mul.Tensor %5030, %5030 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
    %5032 = torch.aten.sum.dim_IntList %5031, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5033 = torch.aten.div.Scalar %5032, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5034 = torch.aten.to.dtype %5033, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5035 = torch.aten.sum.dim_IntList %5026, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5036 = torch.aten.div.Scalar %5035, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5037 = torch.aten.add.Tensor %5034, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5038 = torch.aten.rsqrt %5037 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5039 = torch.aten.sub.Tensor %5021, %5036, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
    %5040 = torch.aten.mul.Tensor %5039, %5038 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
    %5041 = torch.aten.view %5040, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
    %5042 = torch.aten.unsqueeze %550, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5043 = torch.aten.unsqueeze %5042, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5044 = torch.aten.mul.Tensor %5041, %5043 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
    %5045 = torch.aten.unsqueeze %551, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5046 = torch.aten.unsqueeze %5045, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5047 = torch.aten.add.Tensor %5044, %5046, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
    %5048 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5049 = torch.aten.to.dtype %5048, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5050 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5051 = torch.aten.broadcast_to %5049, %5050 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %5052 = torch.valsem.aten.copy %5051, %5047, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
    %5053 = torch.aten.convolution %5052, %552, %553, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5054 = torch.aten.permute %5053, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %5055 = torch.aten.view %5054, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5056 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5057 = torch.aten.sum.dim_IntList %5055, %5056, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %5058 = torch.aten.div.Scalar %5057, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5059 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5060 = torch.aten.broadcast_to %5058, %5059 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5061 = torch.aten.sub.Tensor %5055, %5060, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5062 = torch.aten.mul.Tensor %5061, %5061 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5063 = torch.aten.sum.dim_IntList %5062, %5056, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %5064 = torch.aten.div.Scalar %5063, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5065 = torch.aten.add.Scalar %5064, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5066 = torch.aten.rsqrt %5065 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %5067 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5068 = torch.aten.broadcast_to %5066, %5067 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5069 = torch.aten.mul.Tensor %5061, %5068 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5070 = torch.aten.mul.Tensor %5069, %554 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5071 = torch.aten.add.Tensor %5070, %555, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5072 = torch.aten.transpose.int %556, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %5073 = torch.aten.view %5071, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5074 = torch.aten.mm %5073, %5072 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5075 = torch.aten.view %5074, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5076 = torch.aten.transpose.int %557, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %5077 = torch.aten.view %5071, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5078 = torch.aten.mm %5077, %5076 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5079 = torch.aten.view %5078, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5080 = torch.aten.transpose.int %558, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %5081 = torch.aten.view %5071, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5082 = torch.aten.mm %5081, %5080 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5083 = torch.aten.view %5082, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5084 = torch.aten.view %5075, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %5085 = torch.aten.permute %5084, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %5086 = torch.aten.clone %5085, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %5087 = torch.aten.view %5086, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5088 = torch.aten.view %5079, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %5089 = torch.aten.permute %5088, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %5090 = torch.aten.clone %5089, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %5091 = torch.aten.view %5090, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5092 = torch.aten.view %5083, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %5093 = torch.aten.permute %5092, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %5094 = torch.aten.clone %5093, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %5095 = torch.aten.view %5094, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5096 = torch.aten.transpose.int %5091, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
    %5097 = torch.aten.broadcast_to %5087, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5098 = torch.aten.view %5097, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5099 = torch.aten.broadcast_to %5096, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %5100 = torch.aten.view %5099, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
    %5101 = torch.aten.bmm %5098, %5100 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %5102 = torch.aten.view %5101, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %5103 = torch.aten.mul.Tensor %5102, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
    %values_46, %indices_47 = torch.aten.max.dim %5103, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %5104 = torch.aten.sub.Tensor %5103, %values_46, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
    %5105 = torch.aten.exp %5104 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %5106 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5107 = torch.aten.sum.dim_IntList %5105, %5106, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %5108 = torch.aten.div.Tensor %5105, %5107 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
    %5109 = torch.aten.broadcast_to %5108, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %5110 = torch.aten.view %5109, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
    %5111 = torch.aten.broadcast_to %5095, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5112 = torch.aten.view %5111, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5113 = torch.aten.bmm %5110, %5112 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %5114 = torch.aten.view %5113, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5115 = torch.aten.view %5114, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %5116 = torch.aten.permute %5115, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %5117 = torch.aten.clone %5116, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %5118 = torch.aten.view %5117, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5119 = torch.aten.transpose.int %559, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %5120 = torch.aten.view %5118, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5121 = torch.aten.mm %5120, %5119 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5122 = torch.aten.mul.Scalar %560, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %5123 = torch.aten.add.Tensor %5122, %5121, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %5124 = torch.aten.view %5123, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5125 = torch.aten.add.Tensor %5124, %5055, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5126 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5127 = torch.aten.sum.dim_IntList %5125, %5126, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %5128 = torch.aten.div.Scalar %5127, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5129 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5130 = torch.aten.broadcast_to %5128, %5129 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5131 = torch.aten.sub.Tensor %5125, %5130, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5132 = torch.aten.mul.Tensor %5131, %5131 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5133 = torch.aten.sum.dim_IntList %5132, %5126, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %5134 = torch.aten.div.Scalar %5133, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5135 = torch.aten.add.Scalar %5134, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5136 = torch.aten.rsqrt %5135 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %5137 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5138 = torch.aten.broadcast_to %5136, %5137 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5139 = torch.aten.mul.Tensor %5131, %5138 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5140 = torch.aten.mul.Tensor %5139, %561 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5141 = torch.aten.add.Tensor %5140, %562, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5142 = torch.aten.transpose.int %563, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %5143 = torch.aten.view %5141, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5144 = torch.aten.mm %5143, %5142 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5145 = torch.aten.view %5144, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5146 = torch.aten.transpose.int %564, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %5147 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %5148 = torch.aten.mm %5147, %5146 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %5149 = torch.aten.view %5148, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %5150 = torch.aten.transpose.int %565, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
    %5151 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %5152 = torch.aten.mm %5151, %5150 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
    %5153 = torch.aten.view %5152, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
    %5154 = torch.aten.view %5145, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %5155 = torch.aten.permute %5154, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %5156 = torch.aten.clone %5155, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
    %5157 = torch.aten.view %5156, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5158 = torch.aten.view %5149, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %5159 = torch.aten.permute %5158, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %5160 = torch.aten.clone %5159, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %5161 = torch.aten.view %5160, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %5162 = torch.aten.view %5153, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
    %5163 = torch.aten.permute %5162, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
    %5164 = torch.aten.clone %5163, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
    %5165 = torch.aten.view %5164, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %5166 = torch.aten.transpose.int %5161, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
    %5167 = torch.aten.broadcast_to %5157, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5168 = torch.aten.view %5167, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5169 = torch.aten.broadcast_to %5166, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %5170 = torch.aten.view %5169, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
    %5171 = torch.aten.bmm %5168, %5170 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %5172 = torch.aten.view %5171, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %5173 = torch.aten.mul.Tensor %5172, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
    %values_48, %indices_49 = torch.aten.max.dim %5173, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
    %5174 = torch.aten.sub.Tensor %5173, %values_48, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
    %5175 = torch.aten.exp %5174 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
    %5176 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5177 = torch.aten.sum.dim_IntList %5175, %5176, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
    %5178 = torch.aten.div.Tensor %5175, %5177 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
    %5179 = torch.aten.broadcast_to %5178, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %5180 = torch.aten.view %5179, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
    %5181 = torch.aten.broadcast_to %5165, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %5182 = torch.aten.view %5181, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
    %5183 = torch.aten.bmm %5180, %5182 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
    %5184 = torch.aten.view %5183, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
    %5185 = torch.aten.view %5184, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
    %5186 = torch.aten.permute %5185, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
    %5187 = torch.aten.clone %5186, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
    %5188 = torch.aten.view %5187, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5189 = torch.aten.transpose.int %566, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %5190 = torch.aten.view %5188, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5191 = torch.aten.mm %5190, %5189 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5192 = torch.aten.mul.Scalar %567, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %5193 = torch.aten.add.Tensor %5192, %5191, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %5194 = torch.aten.view %5193, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5195 = torch.aten.add.Tensor %5194, %5125, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5196 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5197 = torch.aten.sum.dim_IntList %5195, %5196, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %5198 = torch.aten.div.Scalar %5197, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5199 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5200 = torch.aten.broadcast_to %5198, %5199 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5201 = torch.aten.sub.Tensor %5195, %5200, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5202 = torch.aten.mul.Tensor %5201, %5201 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5203 = torch.aten.sum.dim_IntList %5202, %5196, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
    %5204 = torch.aten.div.Scalar %5203, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5205 = torch.aten.add.Scalar %5204, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
    %5206 = torch.aten.rsqrt %5205 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
    %5207 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5208 = torch.aten.broadcast_to %5206, %5207 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5209 = torch.aten.mul.Tensor %5201, %5208 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5210 = torch.aten.mul.Tensor %5209, %568 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
    %5211 = torch.aten.add.Tensor %5210, %569, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5212 = torch.aten.transpose.int %570, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %5213 = torch.aten.view %5211, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
    %5214 = torch.aten.mm %5213, %5212 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
    %5215 = torch.aten.mul.Scalar %571, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %5216 = torch.aten.add.Tensor %5215, %5214, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
    %5217 = torch.aten.view %5216, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
    %5218 = torch.aten.slice.Tensor %5217, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %5219 = torch.aten.slice.Tensor %5217, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
    %5220 = torch.aten.gelu %5219, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
    %5221 = torch.aten.mul.Tensor %5218, %5220 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
    %5222 = torch.aten.transpose.int %572, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %5223 = torch.aten.view %5221, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
    %5224 = torch.aten.mm %5223, %5222 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
    %5225 = torch.aten.mul.Scalar %573, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %5226 = torch.aten.add.Tensor %5225, %5224, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
    %5227 = torch.aten.view %5226, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
    %5228 = torch.aten.add.Tensor %5227, %5195, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
    %5229 = torch.aten.view %5228, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
    %5230 = torch.aten.permute %5229, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
    %5231 = torch.aten.convolution %5230, %574, %575, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5232 = torch.aten.add.Tensor %5231, %5019, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
    %5233 = torch.aten.upsample_nearest2d.vec %5232, %none, %3418 : !torch.vtensor<[2,640,32,32],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,640,64,64],f16>
    %5234 = torch.aten.convolution %5233, %576, %577, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
    %5235 = torch.prim.ListConstruct %5234, %1360 : (!torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
    %5236 = torch.aten.cat %5235, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
    %5237 = torch.aten.clone %5236, %int0 : !torch.vtensor<[2,960,64,64],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
    %5238 = torch.prim.ListConstruct %int2, %int32, %int30, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5239 = torch.aten.view %5237, %5238 : !torch.vtensor<[2,960,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,4096],f16>
    %5240 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5241 = torch.aten.to.dtype %5240, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5242 = torch.prim.ListConstruct %int2, %int32, %int30, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5243 = torch.aten.broadcast_to %5241, %5242 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,30,4096],f32>
    %5244 = torch.valsem.aten.copy %5243, %5239, %false : !torch.vtensor<[2,32,30,4096],f32>, !torch.vtensor<[2,32,30,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,30,4096],f32>
    %5245 = torch.aten.to.dtype %5244, %int7, %false, %false, %none : !torch.vtensor<[2,32,30,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,4096],f64>
    %5246 = torch.aten.sum.dim_IntList %5245, %754, %true, %none : !torch.vtensor<[2,32,30,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5247 = torch.aten.div.Scalar %5246, %int122880 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5248 = torch.aten.sub.Tensor %5245, %5247, %float1.000000e00 : !torch.vtensor<[2,32,30,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,30,4096],f64>
    %5249 = torch.aten.mul.Tensor %5248, %5248 : !torch.vtensor<[2,32,30,4096],f64>, !torch.vtensor<[2,32,30,4096],f64> -> !torch.vtensor<[2,32,30,4096],f64>
    %5250 = torch.aten.sum.dim_IntList %5249, %754, %true, %none : !torch.vtensor<[2,32,30,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5251 = torch.aten.div.Scalar %5250, %int122880 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5252 = torch.aten.to.dtype %5251, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5253 = torch.aten.sum.dim_IntList %5244, %754, %true, %none : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5254 = torch.aten.div.Scalar %5253, %int122880 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5255 = torch.aten.add.Tensor %5252, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5256 = torch.aten.rsqrt %5255 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5257 = torch.aten.sub.Tensor %5239, %5254, %int1 : !torch.vtensor<[2,32,30,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,4096],f32>
    %5258 = torch.aten.mul.Tensor %5257, %5256 : !torch.vtensor<[2,32,30,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,4096],f32>
    %5259 = torch.prim.ListConstruct %int2, %int960, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5260 = torch.aten.view %5258, %5259 : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,960,64,64],f32>
    %5261 = torch.aten.unsqueeze %578, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
    %5262 = torch.aten.unsqueeze %5261, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
    %5263 = torch.aten.mul.Tensor %5260, %5262 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,64,64],f32>
    %5264 = torch.aten.unsqueeze %579, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
    %5265 = torch.aten.unsqueeze %5264, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
    %5266 = torch.aten.add.Tensor %5263, %5265, %int1 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f32>
    %5267 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5268 = torch.aten.to.dtype %5267, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5269 = torch.prim.ListConstruct %int2, %int960, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5270 = torch.aten.broadcast_to %5268, %5269 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,960,64,64],f16>
    %5271 = torch.valsem.aten.copy %5270, %5266, %false : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[2,960,64,64],f32>, !torch.bool -> !torch.vtensor<[2,960,64,64],f16>
    %5272 = torch.aten.sigmoid %5271 : !torch.vtensor<[2,960,64,64],f16> -> !torch.vtensor<[2,960,64,64],f16>
    %5273 = torch.aten.mul.Tensor %5272, %5271 : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[2,960,64,64],f16> -> !torch.vtensor<[2,960,64,64],f16>
    %5274 = torch.aten.convolution %5273, %580, %581, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5275 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %5276 = torch.aten.mul.Tensor %5275, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %5277 = torch.aten.transpose.int %582, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %5278 = torch.aten.mm %5276, %5277 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
    %5279 = torch.aten.mul.Scalar %583, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5280 = torch.aten.add.Tensor %5279, %5278, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
    %5281 = torch.aten.slice.Tensor %5280, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %5282 = torch.aten.slice.Tensor %5281, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %5283 = torch.aten.unsqueeze %5282, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
    %5284 = torch.aten.unsqueeze %5283, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
    %5285 = torch.aten.add.Tensor %5274, %5284, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5286 = torch.aten.view %5285, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %5287 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5288 = torch.aten.to.dtype %5287, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5289 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5290 = torch.aten.broadcast_to %5288, %5289 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %5291 = torch.valsem.aten.copy %5290, %5286, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %5292 = torch.aten.to.dtype %5291, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %5293 = torch.aten.sum.dim_IntList %5292, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5294 = torch.aten.div.Scalar %5293, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5295 = torch.aten.sub.Tensor %5292, %5294, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %5296 = torch.aten.mul.Tensor %5295, %5295 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %5297 = torch.aten.sum.dim_IntList %5296, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5298 = torch.aten.div.Scalar %5297, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5299 = torch.aten.to.dtype %5298, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5300 = torch.aten.sum.dim_IntList %5291, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5301 = torch.aten.div.Scalar %5300, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5302 = torch.aten.add.Tensor %5299, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5303 = torch.aten.rsqrt %5302 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5304 = torch.aten.sub.Tensor %5286, %5301, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %5305 = torch.aten.mul.Tensor %5304, %5303 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %5306 = torch.aten.view %5305, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %5307 = torch.aten.unsqueeze %584, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5308 = torch.aten.unsqueeze %5307, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5309 = torch.aten.mul.Tensor %5306, %5308 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %5310 = torch.aten.unsqueeze %585, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5311 = torch.aten.unsqueeze %5310, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5312 = torch.aten.add.Tensor %5309, %5311, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %5313 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5314 = torch.aten.to.dtype %5313, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5315 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5316 = torch.aten.broadcast_to %5314, %5315 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5317 = torch.valsem.aten.copy %5316, %5312, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %5318 = torch.aten.sigmoid %5317 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %5319 = torch.aten.mul.Tensor %5318, %5317 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %5320 = torch.aten.convolution %5319, %586, %587, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5321 = torch.aten.convolution %5236, %588, %589, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5322 = torch.aten.add.Tensor %5321, %5320, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5323 = torch.aten.div.Tensor %5322, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
    %5324 = torch.aten.clone %5323, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5325 = torch.aten.view %5324, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %5326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5327 = torch.aten.to.dtype %5326, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5328 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5329 = torch.aten.broadcast_to %5327, %5328 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %5330 = torch.valsem.aten.copy %5329, %5325, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %5331 = torch.aten.to.dtype %5330, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %5332 = torch.aten.sum.dim_IntList %5331, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5333 = torch.aten.div.Scalar %5332, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5334 = torch.aten.sub.Tensor %5331, %5333, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %5335 = torch.aten.mul.Tensor %5334, %5334 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %5336 = torch.aten.sum.dim_IntList %5335, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5337 = torch.aten.div.Scalar %5336, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5338 = torch.aten.to.dtype %5337, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5339 = torch.aten.sum.dim_IntList %5330, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5340 = torch.aten.div.Scalar %5339, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5341 = torch.aten.add.Tensor %5338, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5342 = torch.aten.rsqrt %5341 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5343 = torch.aten.sub.Tensor %5325, %5340, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %5344 = torch.aten.mul.Tensor %5343, %5342 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %5345 = torch.aten.view %5344, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %5346 = torch.aten.unsqueeze %590, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5347 = torch.aten.unsqueeze %5346, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5348 = torch.aten.mul.Tensor %5345, %5347 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %5349 = torch.aten.unsqueeze %591, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5350 = torch.aten.unsqueeze %5349, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5351 = torch.aten.add.Tensor %5348, %5350, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %5352 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5353 = torch.aten.to.dtype %5352, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5354 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5355 = torch.aten.broadcast_to %5353, %5354 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5356 = torch.valsem.aten.copy %5355, %5351, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %5357 = torch.aten.convolution %5356, %592, %593, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5358 = torch.aten.permute %5357, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %5359 = torch.aten.view %5358, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5360 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5361 = torch.aten.sum.dim_IntList %5359, %5360, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5362 = torch.aten.div.Scalar %5361, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5363 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5364 = torch.aten.broadcast_to %5362, %5363 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5365 = torch.aten.sub.Tensor %5359, %5364, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5366 = torch.aten.mul.Tensor %5365, %5365 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5367 = torch.aten.sum.dim_IntList %5366, %5360, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5368 = torch.aten.div.Scalar %5367, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5369 = torch.aten.add.Scalar %5368, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5370 = torch.aten.rsqrt %5369 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5371 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5372 = torch.aten.broadcast_to %5370, %5371 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5373 = torch.aten.mul.Tensor %5365, %5372 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5374 = torch.aten.mul.Tensor %5373, %594 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5375 = torch.aten.add.Tensor %5374, %595, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5376 = torch.aten.transpose.int %596, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5377 = torch.aten.view %5375, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5378 = torch.aten.mm %5377, %5376 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5379 = torch.aten.view %5378, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5380 = torch.aten.transpose.int %597, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5381 = torch.aten.view %5375, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5382 = torch.aten.mm %5381, %5380 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5383 = torch.aten.view %5382, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5384 = torch.aten.transpose.int %598, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5385 = torch.aten.view %5375, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5386 = torch.aten.mm %5385, %5384 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5387 = torch.aten.view %5386, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5388 = torch.aten.view %5379, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5389 = torch.aten.permute %5388, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5390 = torch.aten.clone %5389, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5391 = torch.aten.view %5390, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5392 = torch.aten.view %5383, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5393 = torch.aten.permute %5392, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5394 = torch.aten.clone %5393, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5395 = torch.aten.view %5394, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5396 = torch.aten.view %5387, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5397 = torch.aten.permute %5396, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5398 = torch.aten.clone %5397, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5399 = torch.aten.view %5398, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5400 = torch.aten.transpose.int %5395, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
    %5401 = torch.aten.broadcast_to %5391, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5402 = torch.aten.view %5401, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5403 = torch.aten.broadcast_to %5400, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %5404 = torch.aten.view %5403, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %5405 = torch.aten.bmm %5402, %5404 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %5406 = torch.aten.view %5405, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %5407 = torch.aten.mul.Tensor %5406, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
    %values_50, %indices_51 = torch.aten.max.dim %5407, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %5408 = torch.aten.sub.Tensor %5407, %values_50, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
    %5409 = torch.aten.exp %5408 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %5410 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5411 = torch.aten.sum.dim_IntList %5409, %5410, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %5412 = torch.aten.div.Tensor %5409, %5411 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %5413 = torch.aten.broadcast_to %5412, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %5414 = torch.aten.view %5413, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %5415 = torch.aten.broadcast_to %5399, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5416 = torch.aten.view %5415, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5417 = torch.aten.bmm %5414, %5416 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %5418 = torch.aten.view %5417, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5419 = torch.aten.view %5418, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5420 = torch.aten.permute %5419, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5421 = torch.aten.clone %5420, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %5422 = torch.aten.view %5421, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5423 = torch.aten.transpose.int %599, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5424 = torch.aten.view %5422, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5425 = torch.aten.mm %5424, %5423 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5426 = torch.aten.mul.Scalar %600, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5427 = torch.aten.add.Tensor %5426, %5425, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %5428 = torch.aten.view %5427, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5429 = torch.aten.add.Tensor %5428, %5359, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5430 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5431 = torch.aten.sum.dim_IntList %5429, %5430, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5432 = torch.aten.div.Scalar %5431, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5433 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5434 = torch.aten.broadcast_to %5432, %5433 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5435 = torch.aten.sub.Tensor %5429, %5434, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5436 = torch.aten.mul.Tensor %5435, %5435 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5437 = torch.aten.sum.dim_IntList %5436, %5430, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5438 = torch.aten.div.Scalar %5437, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5439 = torch.aten.add.Scalar %5438, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5440 = torch.aten.rsqrt %5439 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5441 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5442 = torch.aten.broadcast_to %5440, %5441 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5443 = torch.aten.mul.Tensor %5435, %5442 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5444 = torch.aten.mul.Tensor %5443, %601 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5445 = torch.aten.add.Tensor %5444, %602, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5446 = torch.aten.transpose.int %603, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5447 = torch.aten.view %5445, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5448 = torch.aten.mm %5447, %5446 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5449 = torch.aten.view %5448, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5450 = torch.aten.transpose.int %604, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %5451 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %5452 = torch.aten.mm %5451, %5450 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %5453 = torch.aten.view %5452, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %5454 = torch.aten.transpose.int %605, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %5455 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %5456 = torch.aten.mm %5455, %5454 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %5457 = torch.aten.view %5456, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %5458 = torch.aten.view %5449, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5459 = torch.aten.permute %5458, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5460 = torch.aten.clone %5459, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5461 = torch.aten.view %5460, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5462 = torch.aten.view %5453, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %5463 = torch.aten.permute %5462, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %5464 = torch.aten.clone %5463, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %5465 = torch.aten.view %5464, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5466 = torch.aten.view %5457, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %5467 = torch.aten.permute %5466, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %5468 = torch.aten.clone %5467, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %5469 = torch.aten.view %5468, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5470 = torch.aten.transpose.int %5465, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
    %5471 = torch.aten.broadcast_to %5461, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5472 = torch.aten.view %5471, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5473 = torch.aten.broadcast_to %5470, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %5474 = torch.aten.view %5473, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %5475 = torch.aten.bmm %5472, %5474 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %5476 = torch.aten.view %5475, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %5477 = torch.aten.mul.Tensor %5476, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
    %values_52, %indices_53 = torch.aten.max.dim %5477, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %5478 = torch.aten.sub.Tensor %5477, %values_52, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
    %5479 = torch.aten.exp %5478 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %5480 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5481 = torch.aten.sum.dim_IntList %5479, %5480, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %5482 = torch.aten.div.Tensor %5479, %5481 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
    %5483 = torch.aten.broadcast_to %5482, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %5484 = torch.aten.view %5483, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %5485 = torch.aten.broadcast_to %5469, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5486 = torch.aten.view %5485, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5487 = torch.aten.bmm %5484, %5486 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %5488 = torch.aten.view %5487, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5489 = torch.aten.view %5488, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5490 = torch.aten.permute %5489, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5491 = torch.aten.clone %5490, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %5492 = torch.aten.view %5491, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5493 = torch.aten.transpose.int %606, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5494 = torch.aten.view %5492, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5495 = torch.aten.mm %5494, %5493 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5496 = torch.aten.mul.Scalar %607, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5497 = torch.aten.add.Tensor %5496, %5495, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %5498 = torch.aten.view %5497, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5499 = torch.aten.add.Tensor %5498, %5429, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5500 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5501 = torch.aten.sum.dim_IntList %5499, %5500, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5502 = torch.aten.div.Scalar %5501, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5503 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5504 = torch.aten.broadcast_to %5502, %5503 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5505 = torch.aten.sub.Tensor %5499, %5504, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5506 = torch.aten.mul.Tensor %5505, %5505 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5507 = torch.aten.sum.dim_IntList %5506, %5500, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5508 = torch.aten.div.Scalar %5507, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5509 = torch.aten.add.Scalar %5508, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5510 = torch.aten.rsqrt %5509 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5511 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5512 = torch.aten.broadcast_to %5510, %5511 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5513 = torch.aten.mul.Tensor %5505, %5512 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5514 = torch.aten.mul.Tensor %5513, %608 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5515 = torch.aten.add.Tensor %5514, %609, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5516 = torch.aten.transpose.int %610, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %5517 = torch.aten.view %5515, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5518 = torch.aten.mm %5517, %5516 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
    %5519 = torch.aten.mul.Scalar %611, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %5520 = torch.aten.add.Tensor %5519, %5518, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
    %5521 = torch.aten.view %5520, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
    %5522 = torch.aten.slice.Tensor %5521, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %5523 = torch.aten.slice.Tensor %5521, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %5524 = torch.aten.gelu %5523, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
    %5525 = torch.aten.mul.Tensor %5522, %5524 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
    %5526 = torch.aten.transpose.int %612, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %5527 = torch.aten.view %5525, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
    %5528 = torch.aten.mm %5527, %5526 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5529 = torch.aten.mul.Scalar %613, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5530 = torch.aten.add.Tensor %5529, %5528, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %5531 = torch.aten.view %5530, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5532 = torch.aten.add.Tensor %5531, %5499, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5533 = torch.aten.view %5532, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %5534 = torch.aten.permute %5533, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5535 = torch.aten.convolution %5534, %614, %615, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5536 = torch.aten.add.Tensor %5535, %5323, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5537 = torch.prim.ListConstruct %5536, %1063 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
    %5538 = torch.aten.cat %5537, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
    %5539 = torch.aten.clone %5538, %int0 : !torch.vtensor<[2,640,64,64],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
    %5540 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5541 = torch.aten.view %5539, %5540 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
    %5542 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5543 = torch.aten.to.dtype %5542, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5544 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5545 = torch.aten.broadcast_to %5543, %5544 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f32>
    %5546 = torch.valsem.aten.copy %5545, %5541, %false : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,20,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,20,4096],f32>
    %5547 = torch.aten.to.dtype %5546, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f64>
    %5548 = torch.aten.sum.dim_IntList %5547, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5549 = torch.aten.div.Scalar %5548, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5550 = torch.aten.sub.Tensor %5547, %5549, %float1.000000e00 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,4096],f64>
    %5551 = torch.aten.mul.Tensor %5550, %5550 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,20,4096],f64> -> !torch.vtensor<[2,32,20,4096],f64>
    %5552 = torch.aten.sum.dim_IntList %5551, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5553 = torch.aten.div.Scalar %5552, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5554 = torch.aten.to.dtype %5553, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5555 = torch.aten.sum.dim_IntList %5546, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5556 = torch.aten.div.Scalar %5555, %int81920 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5557 = torch.aten.add.Tensor %5554, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5558 = torch.aten.rsqrt %5557 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5559 = torch.aten.sub.Tensor %5541, %5556, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
    %5560 = torch.aten.mul.Tensor %5559, %5558 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
    %5561 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5562 = torch.aten.view %5560, %5561 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
    %5563 = torch.aten.unsqueeze %616, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5564 = torch.aten.unsqueeze %5563, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5565 = torch.aten.mul.Tensor %5562, %5564 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
    %5566 = torch.aten.unsqueeze %617, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5567 = torch.aten.unsqueeze %5566, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5568 = torch.aten.add.Tensor %5565, %5567, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
    %5569 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5570 = torch.aten.to.dtype %5569, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5571 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5572 = torch.aten.broadcast_to %5570, %5571 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f16>
    %5573 = torch.valsem.aten.copy %5572, %5568, %false : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f32>, !torch.bool -> !torch.vtensor<[2,640,64,64],f16>
    %5574 = torch.aten.sigmoid %5573 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
    %5575 = torch.aten.mul.Tensor %5574, %5573 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
    %5576 = torch.aten.convolution %5575, %618, %619, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5577 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %5578 = torch.aten.mul.Tensor %5577, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %5579 = torch.aten.transpose.int %620, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %5580 = torch.aten.mm %5578, %5579 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
    %5581 = torch.aten.mul.Scalar %621, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5582 = torch.aten.add.Tensor %5581, %5580, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
    %5583 = torch.aten.slice.Tensor %5582, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %5584 = torch.aten.slice.Tensor %5583, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %5585 = torch.aten.unsqueeze %5584, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
    %5586 = torch.aten.unsqueeze %5585, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
    %5587 = torch.aten.add.Tensor %5576, %5586, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5588 = torch.aten.view %5587, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %5589 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5590 = torch.aten.to.dtype %5589, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5591 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5592 = torch.aten.broadcast_to %5590, %5591 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %5593 = torch.valsem.aten.copy %5592, %5588, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %5594 = torch.aten.to.dtype %5593, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %5595 = torch.aten.sum.dim_IntList %5594, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5596 = torch.aten.div.Scalar %5595, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5597 = torch.aten.sub.Tensor %5594, %5596, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %5598 = torch.aten.mul.Tensor %5597, %5597 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %5599 = torch.aten.sum.dim_IntList %5598, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5600 = torch.aten.div.Scalar %5599, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5601 = torch.aten.to.dtype %5600, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5602 = torch.aten.sum.dim_IntList %5593, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5603 = torch.aten.div.Scalar %5602, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5604 = torch.aten.add.Tensor %5601, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5605 = torch.aten.rsqrt %5604 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5606 = torch.aten.sub.Tensor %5588, %5603, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %5607 = torch.aten.mul.Tensor %5606, %5605 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %5608 = torch.aten.view %5607, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %5609 = torch.aten.unsqueeze %622, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5610 = torch.aten.unsqueeze %5609, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5611 = torch.aten.mul.Tensor %5608, %5610 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %5612 = torch.aten.unsqueeze %623, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5613 = torch.aten.unsqueeze %5612, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5614 = torch.aten.add.Tensor %5611, %5613, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %5615 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5616 = torch.aten.to.dtype %5615, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5617 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5618 = torch.aten.broadcast_to %5616, %5617 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5619 = torch.valsem.aten.copy %5618, %5614, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %5620 = torch.aten.sigmoid %5619 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %5621 = torch.aten.mul.Tensor %5620, %5619 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %5622 = torch.aten.convolution %5621, %624, %625, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5623 = torch.aten.convolution %5538, %626, %627, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5624 = torch.aten.add.Tensor %5623, %5622, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5625 = torch.aten.div.Tensor %5624, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
    %5626 = torch.aten.clone %5625, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5627 = torch.aten.view %5626, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %5628 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5629 = torch.aten.to.dtype %5628, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5630 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5631 = torch.aten.broadcast_to %5629, %5630 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %5632 = torch.valsem.aten.copy %5631, %5627, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %5633 = torch.aten.to.dtype %5632, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %5634 = torch.aten.sum.dim_IntList %5633, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5635 = torch.aten.div.Scalar %5634, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5636 = torch.aten.sub.Tensor %5633, %5635, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %5637 = torch.aten.mul.Tensor %5636, %5636 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %5638 = torch.aten.sum.dim_IntList %5637, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5639 = torch.aten.div.Scalar %5638, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5640 = torch.aten.to.dtype %5639, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5641 = torch.aten.sum.dim_IntList %5632, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5642 = torch.aten.div.Scalar %5641, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5643 = torch.aten.add.Tensor %5640, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5644 = torch.aten.rsqrt %5643 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5645 = torch.aten.sub.Tensor %5627, %5642, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %5646 = torch.aten.mul.Tensor %5645, %5644 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %5647 = torch.aten.view %5646, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %5648 = torch.aten.unsqueeze %628, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5649 = torch.aten.unsqueeze %5648, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5650 = torch.aten.mul.Tensor %5647, %5649 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %5651 = torch.aten.unsqueeze %629, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5652 = torch.aten.unsqueeze %5651, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5653 = torch.aten.add.Tensor %5650, %5652, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %5654 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5655 = torch.aten.to.dtype %5654, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5656 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5657 = torch.aten.broadcast_to %5655, %5656 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5658 = torch.valsem.aten.copy %5657, %5653, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %5659 = torch.aten.convolution %5658, %630, %631, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5660 = torch.aten.permute %5659, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %5661 = torch.aten.view %5660, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5662 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5663 = torch.aten.sum.dim_IntList %5661, %5662, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5664 = torch.aten.div.Scalar %5663, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5665 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5666 = torch.aten.broadcast_to %5664, %5665 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5667 = torch.aten.sub.Tensor %5661, %5666, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5668 = torch.aten.mul.Tensor %5667, %5667 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5669 = torch.aten.sum.dim_IntList %5668, %5662, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5670 = torch.aten.div.Scalar %5669, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5671 = torch.aten.add.Scalar %5670, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5672 = torch.aten.rsqrt %5671 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5673 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5674 = torch.aten.broadcast_to %5672, %5673 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5675 = torch.aten.mul.Tensor %5667, %5674 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5676 = torch.aten.mul.Tensor %5675, %632 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5677 = torch.aten.add.Tensor %5676, %633, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5678 = torch.aten.transpose.int %634, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5679 = torch.aten.view %5677, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5680 = torch.aten.mm %5679, %5678 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5681 = torch.aten.view %5680, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5682 = torch.aten.transpose.int %635, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5683 = torch.aten.view %5677, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5684 = torch.aten.mm %5683, %5682 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5685 = torch.aten.view %5684, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5686 = torch.aten.transpose.int %636, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5687 = torch.aten.view %5677, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5688 = torch.aten.mm %5687, %5686 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5689 = torch.aten.view %5688, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5690 = torch.aten.view %5681, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5691 = torch.aten.permute %5690, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5692 = torch.aten.clone %5691, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5693 = torch.aten.view %5692, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5694 = torch.aten.view %5685, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5695 = torch.aten.permute %5694, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5696 = torch.aten.clone %5695, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5697 = torch.aten.view %5696, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5698 = torch.aten.view %5689, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5699 = torch.aten.permute %5698, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5700 = torch.aten.clone %5699, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5701 = torch.aten.view %5700, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5702 = torch.aten.transpose.int %5697, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
    %5703 = torch.aten.broadcast_to %5693, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5704 = torch.aten.view %5703, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5705 = torch.aten.broadcast_to %5702, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %5706 = torch.aten.view %5705, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %5707 = torch.aten.bmm %5704, %5706 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %5708 = torch.aten.view %5707, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %5709 = torch.aten.mul.Tensor %5708, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
    %values_54, %indices_55 = torch.aten.max.dim %5709, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %5710 = torch.aten.sub.Tensor %5709, %values_54, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
    %5711 = torch.aten.exp %5710 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %5712 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5713 = torch.aten.sum.dim_IntList %5711, %5712, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %5714 = torch.aten.div.Tensor %5711, %5713 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %5715 = torch.aten.broadcast_to %5714, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %5716 = torch.aten.view %5715, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %5717 = torch.aten.broadcast_to %5701, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5718 = torch.aten.view %5717, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5719 = torch.aten.bmm %5716, %5718 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %5720 = torch.aten.view %5719, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5721 = torch.aten.view %5720, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5722 = torch.aten.permute %5721, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5723 = torch.aten.clone %5722, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %5724 = torch.aten.view %5723, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5725 = torch.aten.transpose.int %637, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5726 = torch.aten.view %5724, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5727 = torch.aten.mm %5726, %5725 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5728 = torch.aten.mul.Scalar %638, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5729 = torch.aten.add.Tensor %5728, %5727, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %5730 = torch.aten.view %5729, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5731 = torch.aten.add.Tensor %5730, %5661, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5732 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5733 = torch.aten.sum.dim_IntList %5731, %5732, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5734 = torch.aten.div.Scalar %5733, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5735 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5736 = torch.aten.broadcast_to %5734, %5735 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5737 = torch.aten.sub.Tensor %5731, %5736, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5738 = torch.aten.mul.Tensor %5737, %5737 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5739 = torch.aten.sum.dim_IntList %5738, %5732, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5740 = torch.aten.div.Scalar %5739, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5741 = torch.aten.add.Scalar %5740, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5742 = torch.aten.rsqrt %5741 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5743 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5744 = torch.aten.broadcast_to %5742, %5743 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5745 = torch.aten.mul.Tensor %5737, %5744 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5746 = torch.aten.mul.Tensor %5745, %639 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5747 = torch.aten.add.Tensor %5746, %640, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5748 = torch.aten.transpose.int %641, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5749 = torch.aten.view %5747, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5750 = torch.aten.mm %5749, %5748 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5751 = torch.aten.view %5750, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5752 = torch.aten.transpose.int %642, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %5753 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %5754 = torch.aten.mm %5753, %5752 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %5755 = torch.aten.view %5754, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %5756 = torch.aten.transpose.int %643, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %5757 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %5758 = torch.aten.mm %5757, %5756 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %5759 = torch.aten.view %5758, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %5760 = torch.aten.view %5751, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5761 = torch.aten.permute %5760, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5762 = torch.aten.clone %5761, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5763 = torch.aten.view %5762, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5764 = torch.aten.view %5755, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %5765 = torch.aten.permute %5764, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %5766 = torch.aten.clone %5765, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %5767 = torch.aten.view %5766, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5768 = torch.aten.view %5759, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %5769 = torch.aten.permute %5768, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %5770 = torch.aten.clone %5769, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %5771 = torch.aten.view %5770, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5772 = torch.aten.transpose.int %5767, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
    %5773 = torch.aten.broadcast_to %5763, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5774 = torch.aten.view %5773, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5775 = torch.aten.broadcast_to %5772, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %5776 = torch.aten.view %5775, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %5777 = torch.aten.bmm %5774, %5776 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %5778 = torch.aten.view %5777, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %5779 = torch.aten.mul.Tensor %5778, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
    %values_56, %indices_57 = torch.aten.max.dim %5779, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %5780 = torch.aten.sub.Tensor %5779, %values_56, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
    %5781 = torch.aten.exp %5780 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %5782 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5783 = torch.aten.sum.dim_IntList %5781, %5782, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %5784 = torch.aten.div.Tensor %5781, %5783 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
    %5785 = torch.aten.broadcast_to %5784, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %5786 = torch.aten.view %5785, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %5787 = torch.aten.broadcast_to %5771, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5788 = torch.aten.view %5787, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %5789 = torch.aten.bmm %5786, %5788 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %5790 = torch.aten.view %5789, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5791 = torch.aten.view %5790, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5792 = torch.aten.permute %5791, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5793 = torch.aten.clone %5792, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %5794 = torch.aten.view %5793, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5795 = torch.aten.transpose.int %644, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5796 = torch.aten.view %5794, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5797 = torch.aten.mm %5796, %5795 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5798 = torch.aten.mul.Scalar %645, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5799 = torch.aten.add.Tensor %5798, %5797, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %5800 = torch.aten.view %5799, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5801 = torch.aten.add.Tensor %5800, %5731, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5802 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5803 = torch.aten.sum.dim_IntList %5801, %5802, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5804 = torch.aten.div.Scalar %5803, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5805 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5806 = torch.aten.broadcast_to %5804, %5805 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5807 = torch.aten.sub.Tensor %5801, %5806, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5808 = torch.aten.mul.Tensor %5807, %5807 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5809 = torch.aten.sum.dim_IntList %5808, %5802, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5810 = torch.aten.div.Scalar %5809, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5811 = torch.aten.add.Scalar %5810, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5812 = torch.aten.rsqrt %5811 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5813 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5814 = torch.aten.broadcast_to %5812, %5813 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5815 = torch.aten.mul.Tensor %5807, %5814 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5816 = torch.aten.mul.Tensor %5815, %646 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5817 = torch.aten.add.Tensor %5816, %647, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5818 = torch.aten.transpose.int %648, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %5819 = torch.aten.view %5817, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5820 = torch.aten.mm %5819, %5818 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
    %5821 = torch.aten.mul.Scalar %649, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %5822 = torch.aten.add.Tensor %5821, %5820, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
    %5823 = torch.aten.view %5822, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
    %5824 = torch.aten.slice.Tensor %5823, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %5825 = torch.aten.slice.Tensor %5823, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %5826 = torch.aten.gelu %5825, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
    %5827 = torch.aten.mul.Tensor %5824, %5826 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
    %5828 = torch.aten.transpose.int %650, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %5829 = torch.aten.view %5827, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
    %5830 = torch.aten.mm %5829, %5828 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5831 = torch.aten.mul.Scalar %651, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5832 = torch.aten.add.Tensor %5831, %5830, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %5833 = torch.aten.view %5832, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5834 = torch.aten.add.Tensor %5833, %5801, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5835 = torch.aten.view %5834, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %5836 = torch.aten.permute %5835, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5837 = torch.aten.convolution %5836, %652, %653, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5838 = torch.aten.add.Tensor %5837, %5625, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5839 = torch.prim.ListConstruct %5838, %746 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
    %5840 = torch.aten.cat %5839, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
    %5841 = torch.aten.view %5840, %5540 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
    %5842 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5843 = torch.aten.to.dtype %5842, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5844 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5845 = torch.aten.broadcast_to %5843, %5844 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f32>
    %5846 = torch.valsem.aten.copy %5845, %5841, %false : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,20,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,20,4096],f32>
    %5847 = torch.aten.to.dtype %5846, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f64>
    %5848 = torch.aten.sum.dim_IntList %5847, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5849 = torch.aten.div.Scalar %5848, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5850 = torch.aten.sub.Tensor %5847, %5849, %float1.000000e00 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,4096],f64>
    %5851 = torch.aten.mul.Tensor %5850, %5850 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,20,4096],f64> -> !torch.vtensor<[2,32,20,4096],f64>
    %5852 = torch.aten.sum.dim_IntList %5851, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5853 = torch.aten.div.Scalar %5852, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5854 = torch.aten.to.dtype %5853, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5855 = torch.aten.sum.dim_IntList %5846, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5856 = torch.aten.div.Scalar %5855, %int81920 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5857 = torch.aten.add.Tensor %5854, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5858 = torch.aten.rsqrt %5857 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5859 = torch.aten.sub.Tensor %5841, %5856, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
    %5860 = torch.aten.mul.Tensor %5859, %5858 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
    %5861 = torch.aten.view %5860, %5561 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
    %5862 = torch.aten.unsqueeze %654, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5863 = torch.aten.unsqueeze %5862, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5864 = torch.aten.mul.Tensor %5861, %5863 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
    %5865 = torch.aten.unsqueeze %655, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
    %5866 = torch.aten.unsqueeze %5865, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
    %5867 = torch.aten.add.Tensor %5864, %5866, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
    %5868 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5869 = torch.aten.to.dtype %5868, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5870 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5871 = torch.aten.broadcast_to %5869, %5870 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f16>
    %5872 = torch.valsem.aten.copy %5871, %5867, %false : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f32>, !torch.bool -> !torch.vtensor<[2,640,64,64],f16>
    %5873 = torch.aten.sigmoid %5872 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
    %5874 = torch.aten.mul.Tensor %5873, %5872 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
    %5875 = torch.aten.convolution %5874, %656, %657, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5876 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %5877 = torch.aten.mul.Tensor %5876, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
    %5878 = torch.aten.transpose.int %658, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %5879 = torch.aten.mm %5877, %5878 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
    %5880 = torch.aten.mul.Scalar %659, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %5881 = torch.aten.add.Tensor %5880, %5879, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
    %5882 = torch.aten.slice.Tensor %5881, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %5883 = torch.aten.slice.Tensor %5882, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
    %5884 = torch.aten.unsqueeze %5883, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
    %5885 = torch.aten.unsqueeze %5884, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
    %5886 = torch.aten.add.Tensor %5875, %5885, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5887 = torch.aten.view %5886, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %5888 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5889 = torch.aten.to.dtype %5888, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5890 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5891 = torch.aten.broadcast_to %5889, %5890 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %5892 = torch.valsem.aten.copy %5891, %5887, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %5893 = torch.aten.to.dtype %5892, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %5894 = torch.aten.sum.dim_IntList %5893, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5895 = torch.aten.div.Scalar %5894, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5896 = torch.aten.sub.Tensor %5893, %5895, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %5897 = torch.aten.mul.Tensor %5896, %5896 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %5898 = torch.aten.sum.dim_IntList %5897, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5899 = torch.aten.div.Scalar %5898, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5900 = torch.aten.to.dtype %5899, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5901 = torch.aten.sum.dim_IntList %5892, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5902 = torch.aten.div.Scalar %5901, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5903 = torch.aten.add.Tensor %5900, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5904 = torch.aten.rsqrt %5903 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5905 = torch.aten.sub.Tensor %5887, %5902, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %5906 = torch.aten.mul.Tensor %5905, %5904 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %5907 = torch.aten.view %5906, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %5908 = torch.aten.unsqueeze %660, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5909 = torch.aten.unsqueeze %5908, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5910 = torch.aten.mul.Tensor %5907, %5909 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %5911 = torch.aten.unsqueeze %661, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5912 = torch.aten.unsqueeze %5911, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5913 = torch.aten.add.Tensor %5910, %5912, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %5914 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5915 = torch.aten.to.dtype %5914, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5916 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5917 = torch.aten.broadcast_to %5915, %5916 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5918 = torch.valsem.aten.copy %5917, %5913, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %5919 = torch.aten.sigmoid %5918 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %5920 = torch.aten.mul.Tensor %5919, %5918 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %5921 = torch.aten.convolution %5920, %662, %663, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5922 = torch.aten.convolution %5840, %664, %665, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5923 = torch.aten.add.Tensor %5922, %5921, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5924 = torch.aten.div.Tensor %5923, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
    %5925 = torch.aten.view %5924, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %5926 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5927 = torch.aten.to.dtype %5926, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5928 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5929 = torch.aten.broadcast_to %5927, %5928 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %5930 = torch.valsem.aten.copy %5929, %5925, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %5931 = torch.aten.to.dtype %5930, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %5932 = torch.aten.sum.dim_IntList %5931, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5933 = torch.aten.div.Scalar %5932, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5934 = torch.aten.sub.Tensor %5931, %5933, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %5935 = torch.aten.mul.Tensor %5934, %5934 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %5936 = torch.aten.sum.dim_IntList %5935, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %5937 = torch.aten.div.Scalar %5936, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %5938 = torch.aten.to.dtype %5937, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5939 = torch.aten.sum.dim_IntList %5930, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %5940 = torch.aten.div.Scalar %5939, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5941 = torch.aten.add.Tensor %5938, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %5942 = torch.aten.rsqrt %5941 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %5943 = torch.aten.sub.Tensor %5925, %5940, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %5944 = torch.aten.mul.Tensor %5943, %5942 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %5945 = torch.aten.view %5944, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %5946 = torch.aten.unsqueeze %666, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5947 = torch.aten.unsqueeze %5946, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5948 = torch.aten.mul.Tensor %5945, %5947 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %5949 = torch.aten.unsqueeze %667, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %5950 = torch.aten.unsqueeze %5949, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %5951 = torch.aten.add.Tensor %5948, %5950, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %5952 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5953 = torch.aten.to.dtype %5952, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5954 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5955 = torch.aten.broadcast_to %5953, %5954 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %5956 = torch.valsem.aten.copy %5955, %5951, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %5957 = torch.aten.convolution %5956, %668, %669, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %5958 = torch.aten.permute %5957, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %5959 = torch.aten.view %5958, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5960 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5961 = torch.aten.sum.dim_IntList %5959, %5960, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5962 = torch.aten.div.Scalar %5961, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5963 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5964 = torch.aten.broadcast_to %5962, %5963 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5965 = torch.aten.sub.Tensor %5959, %5964, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5966 = torch.aten.mul.Tensor %5965, %5965 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5967 = torch.aten.sum.dim_IntList %5966, %5960, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %5968 = torch.aten.div.Scalar %5967, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5969 = torch.aten.add.Scalar %5968, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %5970 = torch.aten.rsqrt %5969 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %5971 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5972 = torch.aten.broadcast_to %5970, %5971 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5973 = torch.aten.mul.Tensor %5965, %5972 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5974 = torch.aten.mul.Tensor %5973, %670 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %5975 = torch.aten.add.Tensor %5974, %671, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %5976 = torch.aten.transpose.int %672, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5977 = torch.aten.view %5975, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5978 = torch.aten.mm %5977, %5976 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5979 = torch.aten.view %5978, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5980 = torch.aten.transpose.int %673, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5981 = torch.aten.view %5975, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5982 = torch.aten.mm %5981, %5980 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5983 = torch.aten.view %5982, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5984 = torch.aten.transpose.int %674, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %5985 = torch.aten.view %5975, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %5986 = torch.aten.mm %5985, %5984 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %5987 = torch.aten.view %5986, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %5988 = torch.aten.view %5979, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5989 = torch.aten.permute %5988, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5990 = torch.aten.clone %5989, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5991 = torch.aten.view %5990, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5992 = torch.aten.view %5983, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5993 = torch.aten.permute %5992, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5994 = torch.aten.clone %5993, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5995 = torch.aten.view %5994, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %5996 = torch.aten.view %5987, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %5997 = torch.aten.permute %5996, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %5998 = torch.aten.clone %5997, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %5999 = torch.aten.view %5998, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6000 = torch.aten.transpose.int %5995, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
    %6001 = torch.aten.broadcast_to %5991, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6002 = torch.aten.view %6001, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6003 = torch.aten.broadcast_to %6000, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %6004 = torch.aten.view %6003, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
    %6005 = torch.aten.bmm %6002, %6004 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %6006 = torch.aten.view %6005, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %6007 = torch.aten.mul.Tensor %6006, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
    %values_58, %indices_59 = torch.aten.max.dim %6007, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %6008 = torch.aten.sub.Tensor %6007, %values_58, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
    %6009 = torch.aten.exp %6008 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %6010 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %6011 = torch.aten.sum.dim_IntList %6009, %6010, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %6012 = torch.aten.div.Tensor %6009, %6011 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
    %6013 = torch.aten.broadcast_to %6012, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %6014 = torch.aten.view %6013, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
    %6015 = torch.aten.broadcast_to %5999, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6016 = torch.aten.view %6015, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6017 = torch.aten.bmm %6014, %6016 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %6018 = torch.aten.view %6017, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6019 = torch.aten.view %6018, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %6020 = torch.aten.permute %6019, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %6021 = torch.aten.clone %6020, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %6022 = torch.aten.view %6021, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6023 = torch.aten.transpose.int %675, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %6024 = torch.aten.view %6022, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %6025 = torch.aten.mm %6024, %6023 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %6026 = torch.aten.mul.Scalar %676, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %6027 = torch.aten.add.Tensor %6026, %6025, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %6028 = torch.aten.view %6027, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6029 = torch.aten.add.Tensor %6028, %5959, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6030 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %6031 = torch.aten.sum.dim_IntList %6029, %6030, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %6032 = torch.aten.div.Scalar %6031, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %6033 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6034 = torch.aten.broadcast_to %6032, %6033 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6035 = torch.aten.sub.Tensor %6029, %6034, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6036 = torch.aten.mul.Tensor %6035, %6035 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %6037 = torch.aten.sum.dim_IntList %6036, %6030, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %6038 = torch.aten.div.Scalar %6037, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %6039 = torch.aten.add.Scalar %6038, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %6040 = torch.aten.rsqrt %6039 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %6041 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6042 = torch.aten.broadcast_to %6040, %6041 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6043 = torch.aten.mul.Tensor %6035, %6042 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %6044 = torch.aten.mul.Tensor %6043, %677 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %6045 = torch.aten.add.Tensor %6044, %678, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6046 = torch.aten.transpose.int %679, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %6047 = torch.aten.view %6045, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %6048 = torch.aten.mm %6047, %6046 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %6049 = torch.aten.view %6048, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6050 = torch.aten.transpose.int %680, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %6051 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %6052 = torch.aten.mm %6051, %6050 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %6053 = torch.aten.view %6052, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %6054 = torch.aten.transpose.int %681, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
    %6055 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
    %6056 = torch.aten.mm %6055, %6054 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
    %6057 = torch.aten.view %6056, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
    %6058 = torch.aten.view %6049, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %6059 = torch.aten.permute %6058, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %6060 = torch.aten.clone %6059, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
    %6061 = torch.aten.view %6060, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6062 = torch.aten.view %6053, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %6063 = torch.aten.permute %6062, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %6064 = torch.aten.clone %6063, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %6065 = torch.aten.view %6064, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %6066 = torch.aten.view %6057, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
    %6067 = torch.aten.permute %6066, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
    %6068 = torch.aten.clone %6067, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
    %6069 = torch.aten.view %6068, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %6070 = torch.aten.transpose.int %6065, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
    %6071 = torch.aten.broadcast_to %6061, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6072 = torch.aten.view %6071, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6073 = torch.aten.broadcast_to %6070, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %6074 = torch.aten.view %6073, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
    %6075 = torch.aten.bmm %6072, %6074 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %6076 = torch.aten.view %6075, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %6077 = torch.aten.mul.Tensor %6076, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
    %values_60, %indices_61 = torch.aten.max.dim %6077, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
    %6078 = torch.aten.sub.Tensor %6077, %values_60, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
    %6079 = torch.aten.exp %6078 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
    %6080 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %6081 = torch.aten.sum.dim_IntList %6079, %6080, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
    %6082 = torch.aten.div.Tensor %6079, %6081 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
    %6083 = torch.aten.broadcast_to %6082, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %6084 = torch.aten.view %6083, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
    %6085 = torch.aten.broadcast_to %6069, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %6086 = torch.aten.view %6085, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
    %6087 = torch.aten.bmm %6084, %6086 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
    %6088 = torch.aten.view %6087, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
    %6089 = torch.aten.view %6088, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
    %6090 = torch.aten.permute %6089, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
    %6091 = torch.aten.clone %6090, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
    %6092 = torch.aten.view %6091, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6093 = torch.aten.transpose.int %682, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %6094 = torch.aten.view %6092, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %6095 = torch.aten.mm %6094, %6093 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
    %6096 = torch.aten.mul.Scalar %683, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %6097 = torch.aten.add.Tensor %6096, %6095, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %6098 = torch.aten.view %6097, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6099 = torch.aten.add.Tensor %6098, %6029, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6100 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %6101 = torch.aten.sum.dim_IntList %6099, %6100, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %6102 = torch.aten.div.Scalar %6101, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %6103 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6104 = torch.aten.broadcast_to %6102, %6103 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6105 = torch.aten.sub.Tensor %6099, %6104, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6106 = torch.aten.mul.Tensor %6105, %6105 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %6107 = torch.aten.sum.dim_IntList %6106, %6100, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
    %6108 = torch.aten.div.Scalar %6107, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %6109 = torch.aten.add.Scalar %6108, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
    %6110 = torch.aten.rsqrt %6109 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
    %6111 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6112 = torch.aten.broadcast_to %6110, %6111 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6113 = torch.aten.mul.Tensor %6105, %6112 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %6114 = torch.aten.mul.Tensor %6113, %684 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
    %6115 = torch.aten.add.Tensor %6114, %685, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6116 = torch.aten.transpose.int %686, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %6117 = torch.aten.view %6115, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
    %6118 = torch.aten.mm %6117, %6116 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
    %6119 = torch.aten.mul.Scalar %687, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %6120 = torch.aten.add.Tensor %6119, %6118, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
    %6121 = torch.aten.view %6120, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
    %6122 = torch.aten.slice.Tensor %6121, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %6123 = torch.aten.slice.Tensor %6121, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
    %6124 = torch.aten.gelu %6123, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
    %6125 = torch.aten.mul.Tensor %6122, %6124 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
    %6126 = torch.aten.transpose.int %688, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %6127 = torch.aten.view %6125, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
    %6128 = torch.aten.mm %6127, %6126 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
    %6129 = torch.aten.mul.Scalar %689, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %6130 = torch.aten.add.Tensor %6129, %6128, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
    %6131 = torch.aten.view %6130, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
    %6132 = torch.aten.add.Tensor %6131, %6099, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
    %6133 = torch.aten.view %6132, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
    %6134 = torch.aten.permute %6133, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %6135 = torch.aten.convolution %6134, %690, %691, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %6136 = torch.aten.add.Tensor %6135, %5924, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %6137 = torch.aten.clone %6136, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
    %6138 = torch.aten.view %6137, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
    %6139 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6140 = torch.aten.to.dtype %6139, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6141 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6142 = torch.aten.broadcast_to %6140, %6141 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
    %6143 = torch.valsem.aten.copy %6142, %6138, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
    %6144 = torch.aten.to.dtype %6143, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
    %6145 = torch.aten.sum.dim_IntList %6144, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %6146 = torch.aten.div.Scalar %6145, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %6147 = torch.aten.sub.Tensor %6144, %6146, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
    %6148 = torch.aten.mul.Tensor %6147, %6147 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
    %6149 = torch.aten.sum.dim_IntList %6148, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
    %6150 = torch.aten.div.Scalar %6149, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
    %6151 = torch.aten.to.dtype %6150, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %6152 = torch.aten.sum.dim_IntList %6143, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
    %6153 = torch.aten.div.Scalar %6152, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %6154 = torch.aten.add.Tensor %6151, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
    %6155 = torch.aten.rsqrt %6154 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
    %6156 = torch.aten.sub.Tensor %6138, %6153, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
    %6157 = torch.aten.mul.Tensor %6156, %6155 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
    %6158 = torch.aten.view %6157, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
    %6159 = torch.aten.unsqueeze %692, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %6160 = torch.aten.unsqueeze %6159, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %6161 = torch.aten.mul.Tensor %6158, %6160 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
    %6162 = torch.aten.unsqueeze %693, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
    %6163 = torch.aten.unsqueeze %6162, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
    %6164 = torch.aten.add.Tensor %6161, %6163, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
    %6165 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6166 = torch.aten.to.dtype %6165, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6167 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6168 = torch.aten.broadcast_to %6166, %6167 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
    %6169 = torch.valsem.aten.copy %6168, %6164, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
    %6170 = torch.aten.sigmoid %6169 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %6171 = torch.aten.mul.Tensor %6170, %6169 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
    %6172 = torch.aten.convolution %6171, %694, %695, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[4,320,3,3],f16>, !torch.vtensor<[4],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,4,64,64],f16>
    %6173 = torch.aten.slice.Tensor %6172, %int0, %int0, %int1, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
    %6174 = torch.aten.slice.Tensor %6172, %int0, %int1, %int2, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
    %6175 = torch.aten.sub.Tensor %6174, %6173, %int1 : !torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
    %6176 = torch.aten.mul.Tensor %6175, %0 : !torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,4,64,64],f16>
    %6177 = torch.aten.add.Tensor %6173, %6176, %int1 : !torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
    return %6177 : !torch.vtensor<[1,4,64,64],f16>
  }
 }