Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created October 20, 2022 15:35
Show Gist options
  • Save pashu123/86694a8ac755b11f5a235ef515495c9a to your computer and use it in GitHub Desktop.
Save pashu123/86694a8ac755b11f5a235ef515495c9a to your computer and use it in GitHub Desktop.
module attributes {torch.debug_module_name = "_lambda"} {
func.func @forward(%arg0: !torch.vtensor<[1,4,64,64],f16>, %arg1: !torch.vtensor<[1],f16>, %arg2: !torch.vtensor<[2,77,768],f16>, %arg3: !torch.vtensor<[],f32>) -> !torch.vtensor<[1,4,64,64],f16> {
%int64 = torch.constant.int 64
%int320 = torch.constant.int 320
%int2 = torch.constant.int 2
%int40960 = torch.constant.int 40960
%int4096 = torch.constant.int 4096
%int10 = torch.constant.int 10
%int32 = torch.constant.int 32
%int640 = torch.constant.int 640
%int81920 = torch.constant.int 81920
%int20 = torch.constant.int 20
%int960 = torch.constant.int 960
%int122880 = torch.constant.int 122880
%int30 = torch.constant.int 30
%int1024 = torch.constant.int 1024
%int20480 = torch.constant.int 20480
%int30720 = torch.constant.int 30720
%int1280 = torch.constant.int 1280
%int40 = torch.constant.int 40
%int1920 = torch.constant.int 1920
%int61440 = torch.constant.int 61440
%int60 = torch.constant.int 60
%int256 = torch.constant.int 256
%int16 = torch.constant.int 16
%int10240 = torch.constant.int 10240
%int15360 = torch.constant.int 15360
%int2560 = torch.constant.int 2560
%int80 = torch.constant.int 80
%int8 = torch.constant.int 8
%int5120 = torch.constant.int 5120
%int1 = torch.constant.int 1
%float1.000000e00 = torch.constant.float 1.000000e+00
%int7 = torch.constant.int 7
%float0.000000e00 = torch.constant.float 0.000000e+00
%int160 = torch.constant.int 160
%0 = torch.vtensor.literal(dense<7.500000e+00> : tensor<f64>) : !torch.vtensor<[],f64>
%1 = torch.vtensor.literal(dense<0.079056941504209485> : tensor<f64>) : !torch.vtensor<[],f64>
%2 = torch.vtensor.literal(dense<0.11180339887498948> : tensor<f64>) : !torch.vtensor<[],f64>
%3 = torch.vtensor.literal(dense<0.15811388300841897> : tensor<f64>) : !torch.vtensor<[],f64>
%4 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64>
%5 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64>
%6 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64>
%7 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64>
%8 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64>
%9 = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64>
%10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16>
%11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16>
%15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
%47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
%83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf16>) : !torch.vtensor<[640,320,3,3],f16>
%93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
%95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf16>) : !torch.vtensor<[640,320,1,1],f16>
%101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
%123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
%124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
%125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
%133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
%159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
%160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
%161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf16>) : !torch.vtensor<[1280,640,3,3],f16>
%169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf16>) : !torch.vtensor<[1280,640,1,1],f16>
%177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
%311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
%319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
%323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
%331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
%335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
%343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
%349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
%357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
%387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
%395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
%423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
%424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf16>) : !torch.vtensor<[1280,1920,3,3],f16>
%425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf16>) : !torch.vtensor<[1280,1920,1,1],f16>
%433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
%463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
%464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf16>) : !torch.vtensor<[640,1920,3,3],f16>
%465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
%467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf16>) : !torch.vtensor<[640,1920,1,1],f16>
%473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
%495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
%496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
%497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf16>) : !torch.vtensor<[640,1280,3,3],f16>
%503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
%505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf16>) : !torch.vtensor<[640,1280,1,1],f16>
%511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
%533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
%534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
%535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
%539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
%540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf16>) : !torch.vtensor<[640,960,3,3],f16>
%541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
%543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf16>) : !torch.vtensor<[640,960,1,1],f16>
%549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
%571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
%572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
%573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
%579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
%580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf16>) : !torch.vtensor<[320,960,3,3],f16>
%581 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%582 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%583 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%585 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%586 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%587 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%588 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf16>) : !torch.vtensor<[320,960,1,1],f16>
%589 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%590 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%591 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%592 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%593 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%594 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%595 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%596 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%597 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%598 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%599 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%600 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%601 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%602 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%605 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%606 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%607 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%608 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%609 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%610 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
%611 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%612 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%613 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%614 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%615 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%616 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%617 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%618 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16>
%619 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%620 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%621 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%622 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%623 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%624 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%625 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%626 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16>
%627 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%628 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%629 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%630 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%631 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%632 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%633 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%634 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%635 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%636 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%637 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%638 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%639 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%640 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%641 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%642 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%643 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%644 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%645 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
%649 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%650 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%651 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%652 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%653 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%654 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%655 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%656 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16>
%657 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%658 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%659 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%660 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%661 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%662 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%663 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%664 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16>
%665 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%666 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%667 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%668 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%669 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%670 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%671 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%672 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%673 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%674 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%675 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%676 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%677 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%678 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%679 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%680 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%681 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%682 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%683 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%684 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%685 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%686 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
%687 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%688 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%689 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%690 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%691 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%692 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%693 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%694 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16>
%695 = torch.vtensor.literal(dense<[-1.393320e-03, -1.588820e-03, -2.624990e-04, -2.531050e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16>
%int0 = torch.constant.int 0
%float5.000000e-01 = torch.constant.float 5.000000e-01
%false = torch.constant.bool false
%int6 = torch.constant.int 6
%none = torch.constant.none
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int-1 = torch.constant.int -1
%int5 = torch.constant.int 5
%true = torch.constant.bool true
%int3 = torch.constant.int 3
%float1.000000e-05 = torch.constant.float 1.000000e-05
%int8192 = torch.constant.int 8192
%int-2 = torch.constant.int -2
%int154 = torch.constant.int 154
%int768 = torch.constant.int 768
%int77 = torch.constant.int 77
%str = torch.constant.str "none"
%int2048 = torch.constant.int 2048
%int512 = torch.constant.int 512
%int128 = torch.constant.int 128
%float2.000000e00 = torch.constant.float 2.000000e+00
%696 = torch.prim.ListConstruct %arg0, %arg0 : (!torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>) -> !torch.list<vtensor>
%697 = torch.aten.cat %696, %int0 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,4,64,64],f16>
%698 = torch.aten.pow.Tensor_Scalar %arg3, %int2 : !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[],f32>
%699 = torch.aten.add.Tensor %698, %9, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32>
%700 = torch.aten.pow.Tensor_Scalar %699, %float5.000000e-01 : !torch.vtensor<[],f32>, !torch.float -> !torch.vtensor<[],f32>
%701 = torch.aten.div.Tensor %697, %700 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4,64,64],f16>
%702 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%703 = torch.aten.broadcast_to %arg1, %702 : !torch.vtensor<[1],f16>, !torch.list<int> -> !torch.vtensor<[2],f16>
%cuda3A0 = torch.constant.device "cuda:0"
%704 = torch.aten.arange.start_step %int0, %int160, %int1, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
%705 = torch.aten.mul.Tensor %704, %8 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32>
%706 = torch.aten.div.Tensor %705, %7 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32>
%707 = torch.aten.exp %706 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
%708 = torch.aten.slice.Tensor %703, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f16>
%709 = torch.aten.unsqueeze %708, %int1 : !torch.vtensor<[2],f16>, !torch.int -> !torch.vtensor<[2,1],f16>
%710 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%711 = torch.aten.to.dtype %710, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%712 = torch.prim.ListConstruct %int2, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%713 = torch.aten.broadcast_to %711, %712 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,1],f32>
%714 = torch.valsem.aten.copy %713, %709, %false : !torch.vtensor<[2,1],f32>, !torch.vtensor<[2,1],f16>, !torch.bool -> !torch.vtensor<[2,1],f32>
%715 = torch.aten.unsqueeze %707, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
%716 = torch.aten.slice.Tensor %715, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,160],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,160],f32>
%717 = torch.aten.mul.Tensor %714, %716 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32>
%718 = torch.aten.mul.Tensor %717, %9 : !torch.vtensor<[2,160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,160],f32>
%719 = torch.aten.sin %718 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
%720 = torch.aten.cos %718 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
%721 = torch.prim.ListConstruct %719, %720 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
%722 = torch.aten.cat %721, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
%723 = torch.aten.slice.Tensor %722, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
%724 = torch.aten.slice.Tensor %723, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
%725 = torch.aten.slice.Tensor %722, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
%726 = torch.aten.slice.Tensor %725, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
%727 = torch.prim.ListConstruct %724, %726 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
%728 = torch.aten.cat %727, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
%729 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%730 = torch.aten.to.dtype %729, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%731 = torch.prim.ListConstruct %int2, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
%732 = torch.aten.broadcast_to %730, %731 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320],f16>
%733 = torch.valsem.aten.copy %732, %728, %false : !torch.vtensor<[2,320],f16>, !torch.vtensor<[2,320],f32>, !torch.bool -> !torch.vtensor<[2,320],f16>
%734 = torch.aten.transpose.int %10, %int0, %int1 : !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,1280],f16>
%735 = torch.aten.mm %733, %734 : !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[2,1280],f16>
%736 = torch.aten.mul.Scalar %11, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%737 = torch.aten.add.Tensor %736, %735, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%738 = torch.aten.sigmoid %737 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%739 = torch.aten.mul.Tensor %738, %737 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%740 = torch.aten.transpose.int %12, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%741 = torch.aten.mm %739, %740 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%742 = torch.aten.mul.Scalar %13, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%743 = torch.aten.add.Tensor %742, %741, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%744 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%745 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
%746 = torch.aten.convolution %701, %14, %15, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%747 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%748 = torch.aten.view %746, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%749 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%750 = torch.aten.to.dtype %749, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%751 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%752 = torch.aten.broadcast_to %750, %751 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%753 = torch.valsem.aten.copy %752, %748, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%754 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
%755 = torch.aten.to.dtype %753, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%756 = torch.aten.sum.dim_IntList %755, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%757 = torch.aten.div.Scalar %756, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%758 = torch.aten.sub.Tensor %755, %757, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%759 = torch.aten.mul.Tensor %758, %758 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%760 = torch.aten.sum.dim_IntList %759, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%761 = torch.aten.div.Scalar %760, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%762 = torch.aten.to.dtype %761, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%763 = torch.aten.sum.dim_IntList %753, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%764 = torch.aten.div.Scalar %763, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%765 = torch.aten.add.Tensor %762, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%766 = torch.aten.rsqrt %765 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%767 = torch.aten.sub.Tensor %748, %764, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%768 = torch.aten.mul.Tensor %767, %766 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%769 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%770 = torch.aten.view %768, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%771 = torch.aten.unsqueeze %16, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%772 = torch.aten.unsqueeze %771, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%773 = torch.aten.mul.Tensor %770, %772 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%774 = torch.aten.unsqueeze %17, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%775 = torch.aten.unsqueeze %774, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%776 = torch.aten.add.Tensor %773, %775, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%777 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%778 = torch.aten.to.dtype %777, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%779 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%780 = torch.aten.broadcast_to %778, %779 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%781 = torch.valsem.aten.copy %780, %776, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%782 = torch.aten.sigmoid %781 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%783 = torch.aten.mul.Tensor %782, %781 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%784 = torch.aten.convolution %783, %18, %19, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%785 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%786 = torch.aten.mul.Tensor %785, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%787 = torch.aten.transpose.int %20, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%788 = torch.aten.mm %786, %787 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
%789 = torch.aten.mul.Scalar %21, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%790 = torch.aten.add.Tensor %789, %788, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
%791 = torch.aten.slice.Tensor %790, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%792 = torch.aten.slice.Tensor %791, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%793 = torch.aten.unsqueeze %792, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%794 = torch.aten.unsqueeze %793, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%795 = torch.aten.add.Tensor %784, %794, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%796 = torch.aten.view %795, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%797 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%798 = torch.aten.to.dtype %797, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%799 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%800 = torch.aten.broadcast_to %798, %799 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%801 = torch.valsem.aten.copy %800, %796, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%802 = torch.aten.to.dtype %801, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%803 = torch.aten.sum.dim_IntList %802, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%804 = torch.aten.div.Scalar %803, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%805 = torch.aten.sub.Tensor %802, %804, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%806 = torch.aten.mul.Tensor %805, %805 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%807 = torch.aten.sum.dim_IntList %806, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%808 = torch.aten.div.Scalar %807, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%809 = torch.aten.to.dtype %808, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%810 = torch.aten.sum.dim_IntList %801, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%811 = torch.aten.div.Scalar %810, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%812 = torch.aten.add.Tensor %809, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%813 = torch.aten.rsqrt %812 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%814 = torch.aten.sub.Tensor %796, %811, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%815 = torch.aten.mul.Tensor %814, %813 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%816 = torch.aten.view %815, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%817 = torch.aten.unsqueeze %22, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%818 = torch.aten.unsqueeze %817, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%819 = torch.aten.mul.Tensor %816, %818 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%820 = torch.aten.unsqueeze %23, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%821 = torch.aten.unsqueeze %820, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%822 = torch.aten.add.Tensor %819, %821, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%823 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%824 = torch.aten.to.dtype %823, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%825 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%826 = torch.aten.broadcast_to %824, %825 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%827 = torch.valsem.aten.copy %826, %822, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%828 = torch.aten.sigmoid %827 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%829 = torch.aten.mul.Tensor %828, %827 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%830 = torch.aten.convolution %829, %24, %25, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%831 = torch.aten.add.Tensor %746, %830, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%832 = torch.aten.div.Tensor %831, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%833 = torch.aten.view %832, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%834 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%835 = torch.aten.to.dtype %834, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%836 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%837 = torch.aten.broadcast_to %835, %836 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%838 = torch.valsem.aten.copy %837, %833, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%839 = torch.aten.to.dtype %838, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%840 = torch.aten.sum.dim_IntList %839, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%841 = torch.aten.div.Scalar %840, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%842 = torch.aten.sub.Tensor %839, %841, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%843 = torch.aten.mul.Tensor %842, %842 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%844 = torch.aten.sum.dim_IntList %843, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%845 = torch.aten.div.Scalar %844, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%846 = torch.aten.to.dtype %845, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%847 = torch.aten.sum.dim_IntList %838, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%848 = torch.aten.div.Scalar %847, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%849 = torch.aten.add.Tensor %846, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%850 = torch.aten.rsqrt %849 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%851 = torch.aten.sub.Tensor %833, %848, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%852 = torch.aten.mul.Tensor %851, %850 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%853 = torch.aten.view %852, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%854 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%855 = torch.aten.unsqueeze %854, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%856 = torch.aten.mul.Tensor %853, %855 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%857 = torch.aten.unsqueeze %27, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%858 = torch.aten.unsqueeze %857, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%859 = torch.aten.add.Tensor %856, %858, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%860 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%861 = torch.aten.to.dtype %860, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%862 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%863 = torch.aten.broadcast_to %861, %862 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%864 = torch.valsem.aten.copy %863, %859, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%865 = torch.aten.convolution %864, %28, %29, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%866 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%867 = torch.aten.permute %865, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%868 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%869 = torch.aten.view %867, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%870 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%871 = torch.aten.sum.dim_IntList %869, %870, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%872 = torch.aten.div.Scalar %871, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%873 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%874 = torch.aten.broadcast_to %872, %873 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%875 = torch.aten.sub.Tensor %869, %874, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%876 = torch.aten.mul.Tensor %875, %875 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%877 = torch.aten.sum.dim_IntList %876, %870, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%878 = torch.aten.div.Scalar %877, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%879 = torch.aten.add.Scalar %878, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%880 = torch.aten.rsqrt %879 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%881 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%882 = torch.aten.broadcast_to %880, %881 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%883 = torch.aten.mul.Tensor %875, %882 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%884 = torch.aten.mul.Tensor %883, %30 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%885 = torch.aten.add.Tensor %884, %31, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%886 = torch.aten.transpose.int %32, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%887 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
%888 = torch.aten.view %885, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%889 = torch.aten.mm %888, %886 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%890 = torch.aten.view %889, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%891 = torch.aten.transpose.int %33, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%892 = torch.aten.view %885, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%893 = torch.aten.mm %892, %891 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%894 = torch.aten.view %893, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%895 = torch.aten.transpose.int %34, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%896 = torch.aten.view %885, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%897 = torch.aten.mm %896, %895 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%898 = torch.aten.view %897, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%899 = torch.prim.ListConstruct %int2, %int4096, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%900 = torch.aten.view %890, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%901 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%902 = torch.aten.permute %900, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%903 = torch.aten.clone %902, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%904 = torch.prim.ListConstruct %int16, %int4096, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%905 = torch.aten.view %903, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%906 = torch.aten.view %894, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%907 = torch.aten.permute %906, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%908 = torch.aten.clone %907, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%909 = torch.aten.view %908, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%910 = torch.aten.view %898, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%911 = torch.aten.permute %910, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%912 = torch.aten.clone %911, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%913 = torch.aten.view %912, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%914 = torch.aten.transpose.int %909, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%915 = torch.aten.broadcast_to %905, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%916 = torch.aten.view %915, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%917 = torch.prim.ListConstruct %int16, %int40, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%918 = torch.aten.broadcast_to %914, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%919 = torch.aten.view %918, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%920 = torch.aten.bmm %916, %919 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%921 = torch.prim.ListConstruct %int16, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%922 = torch.aten.view %920, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%923 = torch.aten.mul.Tensor %922, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%values, %indices = torch.aten.max.dim %923, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%924 = torch.aten.sub.Tensor %923, %values, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
%925 = torch.aten.exp %924 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%926 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%927 = torch.aten.sum.dim_IntList %925, %926, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%928 = torch.aten.div.Tensor %925, %927 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
%929 = torch.aten.broadcast_to %928, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%930 = torch.aten.view %929, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%931 = torch.aten.broadcast_to %913, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%932 = torch.aten.view %931, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%933 = torch.aten.bmm %930, %932 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%934 = torch.aten.view %933, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%935 = torch.prim.ListConstruct %int2, %int8, %int4096, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%936 = torch.aten.view %934, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%937 = torch.aten.permute %936, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%938 = torch.aten.clone %937, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%939 = torch.aten.view %938, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%940 = torch.aten.transpose.int %35, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%941 = torch.aten.view %939, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%942 = torch.aten.mm %941, %940 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%943 = torch.aten.mul.Scalar %36, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%944 = torch.aten.add.Tensor %943, %942, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%945 = torch.aten.view %944, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%946 = torch.aten.add.Tensor %945, %869, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%947 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%948 = torch.aten.sum.dim_IntList %946, %947, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%949 = torch.aten.div.Scalar %948, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%950 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%951 = torch.aten.broadcast_to %949, %950 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%952 = torch.aten.sub.Tensor %946, %951, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%953 = torch.aten.mul.Tensor %952, %952 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%954 = torch.aten.sum.dim_IntList %953, %947, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%955 = torch.aten.div.Scalar %954, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%956 = torch.aten.add.Scalar %955, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%957 = torch.aten.rsqrt %956 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%958 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%959 = torch.aten.broadcast_to %957, %958 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%960 = torch.aten.mul.Tensor %952, %959 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%961 = torch.aten.mul.Tensor %960, %37 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%962 = torch.aten.add.Tensor %961, %38, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%963 = torch.aten.transpose.int %39, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%964 = torch.aten.view %962, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%965 = torch.aten.mm %964, %963 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%966 = torch.aten.view %965, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%967 = torch.aten.transpose.int %40, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%968 = torch.prim.ListConstruct %int154, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%969 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%970 = torch.aten.mm %969, %967 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%971 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%972 = torch.aten.view %970, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%973 = torch.aten.transpose.int %41, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%974 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%975 = torch.aten.mm %974, %973 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%976 = torch.aten.view %975, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%977 = torch.aten.view %966, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%978 = torch.aten.permute %977, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%979 = torch.aten.clone %978, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%980 = torch.aten.view %979, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%981 = torch.prim.ListConstruct %int2, %int77, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%982 = torch.aten.view %972, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%983 = torch.aten.permute %982, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%984 = torch.aten.clone %983, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%985 = torch.prim.ListConstruct %int16, %int77, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%986 = torch.aten.view %984, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%987 = torch.aten.view %976, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%988 = torch.aten.permute %987, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%989 = torch.aten.clone %988, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%990 = torch.aten.view %989, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%991 = torch.aten.transpose.int %986, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%992 = torch.aten.broadcast_to %980, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%993 = torch.aten.view %992, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%994 = torch.prim.ListConstruct %int16, %int40, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%995 = torch.aten.broadcast_to %991, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%996 = torch.aten.view %995, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%997 = torch.aten.bmm %993, %996 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%998 = torch.prim.ListConstruct %int16, %int4096, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%999 = torch.aten.view %997, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%1000 = torch.aten.mul.Tensor %999, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%values_0, %indices_1 = torch.aten.max.dim %1000, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%1001 = torch.aten.sub.Tensor %1000, %values_0, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
%1002 = torch.aten.exp %1001 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%1003 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1004 = torch.aten.sum.dim_IntList %1002, %1003, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%1005 = torch.aten.div.Tensor %1002, %1004 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
%1006 = torch.aten.broadcast_to %1005, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%1007 = torch.aten.view %1006, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%1008 = torch.aten.broadcast_to %990, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%1009 = torch.aten.view %1008, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%1010 = torch.aten.bmm %1007, %1009 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%1011 = torch.aten.view %1010, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1012 = torch.aten.view %1011, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1013 = torch.aten.permute %1012, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1014 = torch.aten.clone %1013, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%1015 = torch.aten.view %1014, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1016 = torch.aten.transpose.int %42, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1017 = torch.aten.view %1015, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1018 = torch.aten.mm %1017, %1016 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1019 = torch.aten.mul.Scalar %43, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%1020 = torch.aten.add.Tensor %1019, %1018, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%1021 = torch.aten.view %1020, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1022 = torch.aten.add.Tensor %1021, %946, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1023 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1024 = torch.aten.sum.dim_IntList %1022, %1023, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1025 = torch.aten.div.Scalar %1024, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1026 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1027 = torch.aten.broadcast_to %1025, %1026 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1028 = torch.aten.sub.Tensor %1022, %1027, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1029 = torch.aten.mul.Tensor %1028, %1028 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1030 = torch.aten.sum.dim_IntList %1029, %1023, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1031 = torch.aten.div.Scalar %1030, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1032 = torch.aten.add.Scalar %1031, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1033 = torch.aten.rsqrt %1032 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%1034 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1035 = torch.aten.broadcast_to %1033, %1034 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1036 = torch.aten.mul.Tensor %1028, %1035 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1037 = torch.aten.mul.Tensor %1036, %44 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1038 = torch.aten.add.Tensor %1037, %45, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1039 = torch.aten.transpose.int %46, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
%1040 = torch.aten.view %1038, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1041 = torch.aten.mm %1040, %1039 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
%1042 = torch.aten.mul.Scalar %47, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
%1043 = torch.aten.add.Tensor %1042, %1041, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
%1044 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1045 = torch.aten.view %1043, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%1046 = torch.aten.slice.Tensor %1045, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%1047 = torch.aten.slice.Tensor %1045, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%1048 = torch.aten.gelu %1047, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%1049 = torch.aten.mul.Tensor %1046, %1048 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%1050 = torch.aten.transpose.int %48, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%1051 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%1052 = torch.aten.view %1049, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%1053 = torch.aten.mm %1052, %1050 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
%1054 = torch.aten.mul.Scalar %49, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%1055 = torch.aten.add.Tensor %1054, %1053, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%1056 = torch.aten.view %1055, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1057 = torch.aten.add.Tensor %1056, %1022, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1058 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1059 = torch.aten.view %1057, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%1060 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1061 = torch.aten.permute %1059, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%1062 = torch.aten.convolution %1061, %50, %51, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1063 = torch.aten.add.Tensor %1062, %832, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1064 = torch.aten.clone %1063, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1065 = torch.aten.view %1064, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%1066 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1067 = torch.aten.to.dtype %1066, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1068 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1069 = torch.aten.broadcast_to %1067, %1068 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%1070 = torch.valsem.aten.copy %1069, %1065, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%1071 = torch.aten.to.dtype %1070, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%1072 = torch.aten.sum.dim_IntList %1071, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1073 = torch.aten.div.Scalar %1072, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1074 = torch.aten.sub.Tensor %1071, %1073, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%1075 = torch.aten.mul.Tensor %1074, %1074 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%1076 = torch.aten.sum.dim_IntList %1075, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1077 = torch.aten.div.Scalar %1076, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1078 = torch.aten.to.dtype %1077, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1079 = torch.aten.sum.dim_IntList %1070, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1080 = torch.aten.div.Scalar %1079, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1081 = torch.aten.add.Tensor %1078, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1082 = torch.aten.rsqrt %1081 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1083 = torch.aten.sub.Tensor %1065, %1080, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%1084 = torch.aten.mul.Tensor %1083, %1082 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%1085 = torch.aten.view %1084, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%1086 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1087 = torch.aten.unsqueeze %1086, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1088 = torch.aten.mul.Tensor %1085, %1087 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%1089 = torch.aten.unsqueeze %53, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1090 = torch.aten.unsqueeze %1089, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1091 = torch.aten.add.Tensor %1088, %1090, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%1092 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1093 = torch.aten.to.dtype %1092, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1094 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1095 = torch.aten.broadcast_to %1093, %1094 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%1096 = torch.valsem.aten.copy %1095, %1091, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%1097 = torch.aten.sigmoid %1096 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%1098 = torch.aten.mul.Tensor %1097, %1096 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%1099 = torch.aten.convolution %1098, %54, %55, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1100 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1101 = torch.aten.mul.Tensor %1100, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1102 = torch.aten.transpose.int %56, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%1103 = torch.aten.mm %1101, %1102 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
%1104 = torch.aten.mul.Scalar %57, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%1105 = torch.aten.add.Tensor %1104, %1103, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
%1106 = torch.aten.slice.Tensor %1105, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%1107 = torch.aten.slice.Tensor %1106, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%1108 = torch.aten.unsqueeze %1107, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%1109 = torch.aten.unsqueeze %1108, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%1110 = torch.aten.add.Tensor %1099, %1109, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1111 = torch.aten.view %1110, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%1112 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1113 = torch.aten.to.dtype %1112, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1114 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1115 = torch.aten.broadcast_to %1113, %1114 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%1116 = torch.valsem.aten.copy %1115, %1111, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%1117 = torch.aten.to.dtype %1116, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%1118 = torch.aten.sum.dim_IntList %1117, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1119 = torch.aten.div.Scalar %1118, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1120 = torch.aten.sub.Tensor %1117, %1119, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%1121 = torch.aten.mul.Tensor %1120, %1120 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%1122 = torch.aten.sum.dim_IntList %1121, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1123 = torch.aten.div.Scalar %1122, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1124 = torch.aten.to.dtype %1123, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1125 = torch.aten.sum.dim_IntList %1116, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1126 = torch.aten.div.Scalar %1125, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1127 = torch.aten.add.Tensor %1124, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1128 = torch.aten.rsqrt %1127 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1129 = torch.aten.sub.Tensor %1111, %1126, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%1130 = torch.aten.mul.Tensor %1129, %1128 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%1131 = torch.aten.view %1130, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%1132 = torch.aten.unsqueeze %58, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1133 = torch.aten.unsqueeze %1132, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1134 = torch.aten.mul.Tensor %1131, %1133 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%1135 = torch.aten.unsqueeze %59, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1136 = torch.aten.unsqueeze %1135, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1137 = torch.aten.add.Tensor %1134, %1136, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%1138 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1139 = torch.aten.to.dtype %1138, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1140 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1141 = torch.aten.broadcast_to %1139, %1140 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%1142 = torch.valsem.aten.copy %1141, %1137, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%1143 = torch.aten.sigmoid %1142 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%1144 = torch.aten.mul.Tensor %1143, %1142 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%1145 = torch.aten.convolution %1144, %60, %61, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1146 = torch.aten.add.Tensor %1063, %1145, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1147 = torch.aten.div.Tensor %1146, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%1148 = torch.aten.clone %1147, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1149 = torch.aten.view %1148, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%1150 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1151 = torch.aten.to.dtype %1150, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1152 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1153 = torch.aten.broadcast_to %1151, %1152 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%1154 = torch.valsem.aten.copy %1153, %1149, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%1155 = torch.aten.to.dtype %1154, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%1156 = torch.aten.sum.dim_IntList %1155, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1157 = torch.aten.div.Scalar %1156, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1158 = torch.aten.sub.Tensor %1155, %1157, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%1159 = torch.aten.mul.Tensor %1158, %1158 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%1160 = torch.aten.sum.dim_IntList %1159, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1161 = torch.aten.div.Scalar %1160, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1162 = torch.aten.to.dtype %1161, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1163 = torch.aten.sum.dim_IntList %1154, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1164 = torch.aten.div.Scalar %1163, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1165 = torch.aten.add.Tensor %1162, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1166 = torch.aten.rsqrt %1165 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1167 = torch.aten.sub.Tensor %1149, %1164, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%1168 = torch.aten.mul.Tensor %1167, %1166 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%1169 = torch.aten.view %1168, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%1170 = torch.aten.unsqueeze %62, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1171 = torch.aten.unsqueeze %1170, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1172 = torch.aten.mul.Tensor %1169, %1171 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%1173 = torch.aten.unsqueeze %63, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1174 = torch.aten.unsqueeze %1173, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1175 = torch.aten.add.Tensor %1172, %1174, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%1176 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1177 = torch.aten.to.dtype %1176, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1178 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1179 = torch.aten.broadcast_to %1177, %1178 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%1180 = torch.valsem.aten.copy %1179, %1175, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%1181 = torch.aten.convolution %1180, %64, %65, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1182 = torch.aten.permute %1181, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%1183 = torch.aten.view %1182, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1184 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1185 = torch.aten.sum.dim_IntList %1183, %1184, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1186 = torch.aten.div.Scalar %1185, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1187 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1188 = torch.aten.broadcast_to %1186, %1187 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1189 = torch.aten.sub.Tensor %1183, %1188, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1190 = torch.aten.mul.Tensor %1189, %1189 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1191 = torch.aten.sum.dim_IntList %1190, %1184, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1192 = torch.aten.div.Scalar %1191, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1193 = torch.aten.add.Scalar %1192, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1194 = torch.aten.rsqrt %1193 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%1195 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1196 = torch.aten.broadcast_to %1194, %1195 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1197 = torch.aten.mul.Tensor %1189, %1196 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1198 = torch.aten.mul.Tensor %1197, %66 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1199 = torch.aten.add.Tensor %1198, %67, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1200 = torch.aten.transpose.int %68, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1201 = torch.aten.view %1199, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1202 = torch.aten.mm %1201, %1200 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1203 = torch.aten.view %1202, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1204 = torch.aten.transpose.int %69, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1205 = torch.aten.view %1199, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1206 = torch.aten.mm %1205, %1204 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1207 = torch.aten.view %1206, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1208 = torch.aten.transpose.int %70, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1209 = torch.aten.view %1199, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1210 = torch.aten.mm %1209, %1208 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1211 = torch.aten.view %1210, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1212 = torch.aten.view %1203, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1213 = torch.aten.permute %1212, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1214 = torch.aten.clone %1213, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%1215 = torch.aten.view %1214, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1216 = torch.aten.view %1207, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1217 = torch.aten.permute %1216, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1218 = torch.aten.clone %1217, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%1219 = torch.aten.view %1218, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1220 = torch.aten.view %1211, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1221 = torch.aten.permute %1220, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1222 = torch.aten.clone %1221, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%1223 = torch.aten.view %1222, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1224 = torch.aten.transpose.int %1219, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%1225 = torch.aten.broadcast_to %1215, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1226 = torch.aten.view %1225, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1227 = torch.aten.broadcast_to %1224, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%1228 = torch.aten.view %1227, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%1229 = torch.aten.bmm %1226, %1228 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%1230 = torch.aten.view %1229, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%1231 = torch.aten.mul.Tensor %1230, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%values_2, %indices_3 = torch.aten.max.dim %1231, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%1232 = torch.aten.sub.Tensor %1231, %values_2, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
%1233 = torch.aten.exp %1232 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%1234 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1235 = torch.aten.sum.dim_IntList %1233, %1234, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%1236 = torch.aten.div.Tensor %1233, %1235 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
%1237 = torch.aten.broadcast_to %1236, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%1238 = torch.aten.view %1237, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%1239 = torch.aten.broadcast_to %1223, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1240 = torch.aten.view %1239, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1241 = torch.aten.bmm %1238, %1240 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%1242 = torch.aten.view %1241, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1243 = torch.aten.view %1242, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1244 = torch.aten.permute %1243, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1245 = torch.aten.clone %1244, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%1246 = torch.aten.view %1245, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1247 = torch.aten.transpose.int %71, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1248 = torch.aten.view %1246, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1249 = torch.aten.mm %1248, %1247 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1250 = torch.aten.mul.Scalar %72, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%1251 = torch.aten.add.Tensor %1250, %1249, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%1252 = torch.aten.view %1251, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1253 = torch.aten.add.Tensor %1252, %1183, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1254 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1255 = torch.aten.sum.dim_IntList %1253, %1254, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1256 = torch.aten.div.Scalar %1255, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1257 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1258 = torch.aten.broadcast_to %1256, %1257 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1259 = torch.aten.sub.Tensor %1253, %1258, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1260 = torch.aten.mul.Tensor %1259, %1259 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1261 = torch.aten.sum.dim_IntList %1260, %1254, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1262 = torch.aten.div.Scalar %1261, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1263 = torch.aten.add.Scalar %1262, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1264 = torch.aten.rsqrt %1263 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%1265 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1266 = torch.aten.broadcast_to %1264, %1265 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1267 = torch.aten.mul.Tensor %1259, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1268 = torch.aten.mul.Tensor %1267, %73 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1269 = torch.aten.add.Tensor %1268, %74, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1270 = torch.aten.transpose.int %75, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1271 = torch.aten.view %1269, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1272 = torch.aten.mm %1271, %1270 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1273 = torch.aten.view %1272, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1274 = torch.aten.transpose.int %76, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%1275 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1276 = torch.aten.mm %1275, %1274 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%1277 = torch.aten.view %1276, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%1278 = torch.aten.transpose.int %77, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%1279 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1280 = torch.aten.mm %1279, %1278 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%1281 = torch.aten.view %1280, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%1282 = torch.aten.view %1273, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1283 = torch.aten.permute %1282, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1284 = torch.aten.clone %1283, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%1285 = torch.aten.view %1284, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1286 = torch.aten.view %1277, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%1287 = torch.aten.permute %1286, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%1288 = torch.aten.clone %1287, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%1289 = torch.aten.view %1288, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%1290 = torch.aten.view %1281, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%1291 = torch.aten.permute %1290, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%1292 = torch.aten.clone %1291, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%1293 = torch.aten.view %1292, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%1294 = torch.aten.transpose.int %1289, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%1295 = torch.aten.broadcast_to %1285, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1296 = torch.aten.view %1295, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1297 = torch.aten.broadcast_to %1294, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%1298 = torch.aten.view %1297, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%1299 = torch.aten.bmm %1296, %1298 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%1300 = torch.aten.view %1299, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%1301 = torch.aten.mul.Tensor %1300, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%values_4, %indices_5 = torch.aten.max.dim %1301, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%1302 = torch.aten.sub.Tensor %1301, %values_4, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
%1303 = torch.aten.exp %1302 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%1304 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1305 = torch.aten.sum.dim_IntList %1303, %1304, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%1306 = torch.aten.div.Tensor %1303, %1305 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
%1307 = torch.aten.broadcast_to %1306, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%1308 = torch.aten.view %1307, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%1309 = torch.aten.broadcast_to %1293, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%1310 = torch.aten.view %1309, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%1311 = torch.aten.bmm %1308, %1310 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%1312 = torch.aten.view %1311, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%1313 = torch.aten.view %1312, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%1314 = torch.aten.permute %1313, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%1315 = torch.aten.clone %1314, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%1316 = torch.aten.view %1315, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1317 = torch.aten.transpose.int %78, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%1318 = torch.aten.view %1316, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1319 = torch.aten.mm %1318, %1317 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%1320 = torch.aten.mul.Scalar %79, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%1321 = torch.aten.add.Tensor %1320, %1319, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%1322 = torch.aten.view %1321, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1323 = torch.aten.add.Tensor %1322, %1253, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1324 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1325 = torch.aten.sum.dim_IntList %1323, %1324, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1326 = torch.aten.div.Scalar %1325, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1327 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1328 = torch.aten.broadcast_to %1326, %1327 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1329 = torch.aten.sub.Tensor %1323, %1328, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1330 = torch.aten.mul.Tensor %1329, %1329 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1331 = torch.aten.sum.dim_IntList %1330, %1324, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%1332 = torch.aten.div.Scalar %1331, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1333 = torch.aten.add.Scalar %1332, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%1334 = torch.aten.rsqrt %1333 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%1335 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1336 = torch.aten.broadcast_to %1334, %1335 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1337 = torch.aten.mul.Tensor %1329, %1336 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1338 = torch.aten.mul.Tensor %1337, %80 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%1339 = torch.aten.add.Tensor %1338, %81, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1340 = torch.aten.transpose.int %82, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
%1341 = torch.aten.view %1339, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%1342 = torch.aten.mm %1341, %1340 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
%1343 = torch.aten.mul.Scalar %83, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
%1344 = torch.aten.add.Tensor %1343, %1342, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
%1345 = torch.aten.view %1344, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%1346 = torch.aten.slice.Tensor %1345, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%1347 = torch.aten.slice.Tensor %1345, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%1348 = torch.aten.gelu %1347, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%1349 = torch.aten.mul.Tensor %1346, %1348 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%1350 = torch.aten.transpose.int %84, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%1351 = torch.aten.view %1349, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%1352 = torch.aten.mm %1351, %1350 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
%1353 = torch.aten.mul.Scalar %85, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%1354 = torch.aten.add.Tensor %1353, %1352, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%1355 = torch.aten.view %1354, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%1356 = torch.aten.add.Tensor %1355, %1323, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%1357 = torch.aten.view %1356, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%1358 = torch.aten.permute %1357, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%1359 = torch.aten.convolution %1358, %86, %87, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1360 = torch.aten.add.Tensor %1359, %1147, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%1361 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
%1362 = torch.aten.convolution %1360, %88, %89, %1361, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,32,32],f16>
%1363 = torch.aten.clone %1362, %int0 : !torch.vtensor<[2,320,32,32],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16>
%1364 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1365 = torch.aten.view %1363, %1364 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16>
%1366 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1367 = torch.aten.to.dtype %1366, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1368 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1369 = torch.aten.broadcast_to %1367, %1368 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f32>
%1370 = torch.valsem.aten.copy %1369, %1365, %false : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,10,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,10,1024],f32>
%1371 = torch.aten.to.dtype %1370, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f64>
%1372 = torch.aten.sum.dim_IntList %1371, %754, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1373 = torch.aten.div.Scalar %1372, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1374 = torch.aten.sub.Tensor %1371, %1373, %float1.000000e00 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,1024],f64>
%1375 = torch.aten.mul.Tensor %1374, %1374 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,10,1024],f64> -> !torch.vtensor<[2,32,10,1024],f64>
%1376 = torch.aten.sum.dim_IntList %1375, %754, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1377 = torch.aten.div.Scalar %1376, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1378 = torch.aten.to.dtype %1377, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1379 = torch.aten.sum.dim_IntList %1370, %754, %true, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1380 = torch.aten.div.Scalar %1379, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1381 = torch.aten.add.Tensor %1378, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1382 = torch.aten.rsqrt %1381 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1383 = torch.aten.sub.Tensor %1365, %1380, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,1024],f32>
%1384 = torch.aten.mul.Tensor %1383, %1382 : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,1024],f32>
%1385 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1386 = torch.aten.view %1384, %1385 : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f32>
%1387 = torch.aten.unsqueeze %90, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1388 = torch.aten.unsqueeze %1387, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1389 = torch.aten.mul.Tensor %1386, %1388 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f32>
%1390 = torch.aten.unsqueeze %91, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%1391 = torch.aten.unsqueeze %1390, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%1392 = torch.aten.add.Tensor %1389, %1391, %int1 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f32>
%1393 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1394 = torch.aten.to.dtype %1393, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1395 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1396 = torch.aten.broadcast_to %1394, %1395 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16>
%1397 = torch.valsem.aten.copy %1396, %1392, %false : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f32>, !torch.bool -> !torch.vtensor<[2,320,32,32],f16>
%1398 = torch.aten.sigmoid %1397 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16>
%1399 = torch.aten.mul.Tensor %1398, %1397 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16>
%1400 = torch.aten.convolution %1399, %92, %93, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1401 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1402 = torch.aten.mul.Tensor %1401, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1403 = torch.aten.transpose.int %94, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
%1404 = torch.aten.mm %1402, %1403 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
%1405 = torch.aten.mul.Scalar %95, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1406 = torch.aten.add.Tensor %1405, %1404, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
%1407 = torch.aten.slice.Tensor %1406, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%1408 = torch.aten.slice.Tensor %1407, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%1409 = torch.aten.unsqueeze %1408, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%1410 = torch.aten.unsqueeze %1409, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%1411 = torch.aten.add.Tensor %1400, %1410, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1412 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1413 = torch.aten.view %1411, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%1414 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1415 = torch.aten.to.dtype %1414, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1416 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1417 = torch.aten.broadcast_to %1415, %1416 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%1418 = torch.valsem.aten.copy %1417, %1413, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%1419 = torch.aten.to.dtype %1418, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%1420 = torch.aten.sum.dim_IntList %1419, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1421 = torch.aten.div.Scalar %1420, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1422 = torch.aten.sub.Tensor %1419, %1421, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%1423 = torch.aten.mul.Tensor %1422, %1422 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%1424 = torch.aten.sum.dim_IntList %1423, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1425 = torch.aten.div.Scalar %1424, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1426 = torch.aten.to.dtype %1425, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1427 = torch.aten.sum.dim_IntList %1418, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1428 = torch.aten.div.Scalar %1427, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1429 = torch.aten.add.Tensor %1426, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1430 = torch.aten.rsqrt %1429 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1431 = torch.aten.sub.Tensor %1413, %1428, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%1432 = torch.aten.mul.Tensor %1431, %1430 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%1433 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1434 = torch.aten.view %1432, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%1435 = torch.aten.unsqueeze %96, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1436 = torch.aten.unsqueeze %1435, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1437 = torch.aten.mul.Tensor %1434, %1436 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%1438 = torch.aten.unsqueeze %97, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1439 = torch.aten.unsqueeze %1438, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1440 = torch.aten.add.Tensor %1437, %1439, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%1441 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1442 = torch.aten.to.dtype %1441, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1443 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1444 = torch.aten.broadcast_to %1442, %1443 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1445 = torch.valsem.aten.copy %1444, %1440, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%1446 = torch.aten.sigmoid %1445 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%1447 = torch.aten.mul.Tensor %1446, %1445 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%1448 = torch.aten.convolution %1447, %98, %99, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1449 = torch.aten.convolution %1362, %100, %101, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1450 = torch.aten.add.Tensor %1449, %1448, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1451 = torch.aten.div.Tensor %1450, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%1452 = torch.aten.clone %1451, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1453 = torch.aten.view %1452, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%1454 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1455 = torch.aten.to.dtype %1454, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1456 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1457 = torch.aten.broadcast_to %1455, %1456 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%1458 = torch.valsem.aten.copy %1457, %1453, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%1459 = torch.aten.to.dtype %1458, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%1460 = torch.aten.sum.dim_IntList %1459, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1461 = torch.aten.div.Scalar %1460, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1462 = torch.aten.sub.Tensor %1459, %1461, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%1463 = torch.aten.mul.Tensor %1462, %1462 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%1464 = torch.aten.sum.dim_IntList %1463, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1465 = torch.aten.div.Scalar %1464, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1466 = torch.aten.to.dtype %1465, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1467 = torch.aten.sum.dim_IntList %1458, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1468 = torch.aten.div.Scalar %1467, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1469 = torch.aten.add.Tensor %1466, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1470 = torch.aten.rsqrt %1469 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1471 = torch.aten.sub.Tensor %1453, %1468, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%1472 = torch.aten.mul.Tensor %1471, %1470 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%1473 = torch.aten.view %1472, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%1474 = torch.aten.unsqueeze %102, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1475 = torch.aten.unsqueeze %1474, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1476 = torch.aten.mul.Tensor %1473, %1475 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%1477 = torch.aten.unsqueeze %103, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1478 = torch.aten.unsqueeze %1477, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1479 = torch.aten.add.Tensor %1476, %1478, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%1480 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1481 = torch.aten.to.dtype %1480, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1482 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1483 = torch.aten.broadcast_to %1481, %1482 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1484 = torch.valsem.aten.copy %1483, %1479, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%1485 = torch.aten.convolution %1484, %104, %105, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1486 = torch.aten.permute %1485, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%1487 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1488 = torch.aten.view %1486, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1489 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1490 = torch.aten.sum.dim_IntList %1488, %1489, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1491 = torch.aten.div.Scalar %1490, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1492 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1493 = torch.aten.broadcast_to %1491, %1492 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1494 = torch.aten.sub.Tensor %1488, %1493, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1495 = torch.aten.mul.Tensor %1494, %1494 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1496 = torch.aten.sum.dim_IntList %1495, %1489, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1497 = torch.aten.div.Scalar %1496, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1498 = torch.aten.add.Scalar %1497, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1499 = torch.aten.rsqrt %1498 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%1500 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1501 = torch.aten.broadcast_to %1499, %1500 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1502 = torch.aten.mul.Tensor %1494, %1501 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1503 = torch.aten.mul.Tensor %1502, %106 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1504 = torch.aten.add.Tensor %1503, %107, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1505 = torch.aten.transpose.int %108, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1506 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
%1507 = torch.aten.view %1504, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1508 = torch.aten.mm %1507, %1505 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1509 = torch.aten.view %1508, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1510 = torch.aten.transpose.int %109, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1511 = torch.aten.view %1504, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1512 = torch.aten.mm %1511, %1510 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1513 = torch.aten.view %1512, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1514 = torch.aten.transpose.int %110, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1515 = torch.aten.view %1504, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1516 = torch.aten.mm %1515, %1514 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1517 = torch.aten.view %1516, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1518 = torch.prim.ListConstruct %int2, %int1024, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1519 = torch.aten.view %1509, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1520 = torch.aten.permute %1519, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1521 = torch.aten.clone %1520, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1522 = torch.prim.ListConstruct %int16, %int1024, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1523 = torch.aten.view %1521, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1524 = torch.aten.view %1513, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1525 = torch.aten.permute %1524, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1526 = torch.aten.clone %1525, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1527 = torch.aten.view %1526, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1528 = torch.aten.view %1517, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1529 = torch.aten.permute %1528, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1530 = torch.aten.clone %1529, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1531 = torch.aten.view %1530, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1532 = torch.aten.transpose.int %1527, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%1533 = torch.aten.broadcast_to %1523, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1534 = torch.aten.view %1533, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1535 = torch.prim.ListConstruct %int16, %int80, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1536 = torch.aten.broadcast_to %1532, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%1537 = torch.aten.view %1536, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%1538 = torch.aten.bmm %1534, %1537 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%1539 = torch.prim.ListConstruct %int16, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1540 = torch.aten.view %1538, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%1541 = torch.aten.mul.Tensor %1540, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%values_6, %indices_7 = torch.aten.max.dim %1541, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%1542 = torch.aten.sub.Tensor %1541, %values_6, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
%1543 = torch.aten.exp %1542 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%1544 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1545 = torch.aten.sum.dim_IntList %1543, %1544, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%1546 = torch.aten.div.Tensor %1543, %1545 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
%1547 = torch.aten.broadcast_to %1546, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%1548 = torch.aten.view %1547, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%1549 = torch.aten.broadcast_to %1531, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1550 = torch.aten.view %1549, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1551 = torch.aten.bmm %1548, %1550 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%1552 = torch.aten.view %1551, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1553 = torch.prim.ListConstruct %int2, %int8, %int1024, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1554 = torch.aten.view %1552, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1555 = torch.aten.permute %1554, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1556 = torch.aten.clone %1555, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%1557 = torch.aten.view %1556, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1558 = torch.aten.transpose.int %111, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1559 = torch.aten.view %1557, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1560 = torch.aten.mm %1559, %1558 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1561 = torch.aten.mul.Scalar %112, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1562 = torch.aten.add.Tensor %1561, %1560, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%1563 = torch.aten.view %1562, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1564 = torch.aten.add.Tensor %1563, %1488, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1565 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1566 = torch.aten.sum.dim_IntList %1564, %1565, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1567 = torch.aten.div.Scalar %1566, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1568 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1569 = torch.aten.broadcast_to %1567, %1568 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1570 = torch.aten.sub.Tensor %1564, %1569, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1571 = torch.aten.mul.Tensor %1570, %1570 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1572 = torch.aten.sum.dim_IntList %1571, %1565, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1573 = torch.aten.div.Scalar %1572, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1574 = torch.aten.add.Scalar %1573, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1575 = torch.aten.rsqrt %1574 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%1576 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1577 = torch.aten.broadcast_to %1575, %1576 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1578 = torch.aten.mul.Tensor %1570, %1577 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1579 = torch.aten.mul.Tensor %1578, %113 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1580 = torch.aten.add.Tensor %1579, %114, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1581 = torch.aten.transpose.int %115, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1582 = torch.aten.view %1580, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1583 = torch.aten.mm %1582, %1581 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1584 = torch.aten.view %1583, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1585 = torch.aten.transpose.int %116, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%1586 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1587 = torch.aten.mm %1586, %1585 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%1588 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1589 = torch.aten.view %1587, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%1590 = torch.aten.transpose.int %117, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%1591 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1592 = torch.aten.mm %1591, %1590 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%1593 = torch.aten.view %1592, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%1594 = torch.aten.view %1584, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1595 = torch.aten.permute %1594, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1596 = torch.aten.clone %1595, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1597 = torch.aten.view %1596, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1598 = torch.prim.ListConstruct %int2, %int77, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1599 = torch.aten.view %1589, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%1600 = torch.aten.permute %1599, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%1601 = torch.aten.clone %1600, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%1602 = torch.prim.ListConstruct %int16, %int77, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1603 = torch.aten.view %1601, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1604 = torch.aten.view %1593, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%1605 = torch.aten.permute %1604, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%1606 = torch.aten.clone %1605, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%1607 = torch.aten.view %1606, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1608 = torch.aten.transpose.int %1603, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%1609 = torch.aten.broadcast_to %1597, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1610 = torch.aten.view %1609, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1611 = torch.prim.ListConstruct %int16, %int80, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1612 = torch.aten.broadcast_to %1608, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%1613 = torch.aten.view %1612, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%1614 = torch.aten.bmm %1610, %1613 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%1615 = torch.prim.ListConstruct %int16, %int1024, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1616 = torch.aten.view %1614, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%1617 = torch.aten.mul.Tensor %1616, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%values_8, %indices_9 = torch.aten.max.dim %1617, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%1618 = torch.aten.sub.Tensor %1617, %values_8, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
%1619 = torch.aten.exp %1618 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%1620 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1621 = torch.aten.sum.dim_IntList %1619, %1620, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%1622 = torch.aten.div.Tensor %1619, %1621 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
%1623 = torch.aten.broadcast_to %1622, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%1624 = torch.aten.view %1623, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%1625 = torch.aten.broadcast_to %1607, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1626 = torch.aten.view %1625, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1627 = torch.aten.bmm %1624, %1626 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%1628 = torch.aten.view %1627, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1629 = torch.aten.view %1628, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1630 = torch.aten.permute %1629, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1631 = torch.aten.clone %1630, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%1632 = torch.aten.view %1631, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1633 = torch.aten.transpose.int %118, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1634 = torch.aten.view %1632, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1635 = torch.aten.mm %1634, %1633 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1636 = torch.aten.mul.Scalar %119, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1637 = torch.aten.add.Tensor %1636, %1635, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%1638 = torch.aten.view %1637, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1639 = torch.aten.add.Tensor %1638, %1564, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1640 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1641 = torch.aten.sum.dim_IntList %1639, %1640, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1642 = torch.aten.div.Scalar %1641, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1643 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1644 = torch.aten.broadcast_to %1642, %1643 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1645 = torch.aten.sub.Tensor %1639, %1644, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1646 = torch.aten.mul.Tensor %1645, %1645 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1647 = torch.aten.sum.dim_IntList %1646, %1640, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1648 = torch.aten.div.Scalar %1647, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1649 = torch.aten.add.Scalar %1648, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1650 = torch.aten.rsqrt %1649 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%1651 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1652 = torch.aten.broadcast_to %1650, %1651 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1653 = torch.aten.mul.Tensor %1645, %1652 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1654 = torch.aten.mul.Tensor %1653, %120 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1655 = torch.aten.add.Tensor %1654, %121, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1656 = torch.aten.transpose.int %122, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
%1657 = torch.aten.view %1655, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1658 = torch.aten.mm %1657, %1656 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
%1659 = torch.aten.mul.Scalar %123, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
%1660 = torch.aten.add.Tensor %1659, %1658, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
%1661 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1662 = torch.aten.view %1660, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%1663 = torch.aten.slice.Tensor %1662, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%1664 = torch.aten.slice.Tensor %1662, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%1665 = torch.aten.gelu %1664, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%1666 = torch.aten.mul.Tensor %1663, %1665 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%1667 = torch.aten.transpose.int %124, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
%1668 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
%1669 = torch.aten.view %1666, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%1670 = torch.aten.mm %1669, %1667 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
%1671 = torch.aten.mul.Scalar %125, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1672 = torch.aten.add.Tensor %1671, %1670, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%1673 = torch.aten.view %1672, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1674 = torch.aten.add.Tensor %1673, %1639, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1675 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1676 = torch.aten.view %1674, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%1677 = torch.aten.permute %1676, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1678 = torch.aten.convolution %1677, %126, %127, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1679 = torch.aten.add.Tensor %1678, %1451, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1680 = torch.aten.clone %1679, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1681 = torch.aten.view %1680, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%1682 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1683 = torch.aten.to.dtype %1682, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1684 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1685 = torch.aten.broadcast_to %1683, %1684 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%1686 = torch.valsem.aten.copy %1685, %1681, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%1687 = torch.aten.to.dtype %1686, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%1688 = torch.aten.sum.dim_IntList %1687, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1689 = torch.aten.div.Scalar %1688, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1690 = torch.aten.sub.Tensor %1687, %1689, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%1691 = torch.aten.mul.Tensor %1690, %1690 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%1692 = torch.aten.sum.dim_IntList %1691, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1693 = torch.aten.div.Scalar %1692, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1694 = torch.aten.to.dtype %1693, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1695 = torch.aten.sum.dim_IntList %1686, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1696 = torch.aten.div.Scalar %1695, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1697 = torch.aten.add.Tensor %1694, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1698 = torch.aten.rsqrt %1697 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1699 = torch.aten.sub.Tensor %1681, %1696, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%1700 = torch.aten.mul.Tensor %1699, %1698 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%1701 = torch.aten.view %1700, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%1702 = torch.aten.unsqueeze %128, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1703 = torch.aten.unsqueeze %1702, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1704 = torch.aten.mul.Tensor %1701, %1703 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%1705 = torch.aten.unsqueeze %129, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1706 = torch.aten.unsqueeze %1705, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1707 = torch.aten.add.Tensor %1704, %1706, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%1708 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1709 = torch.aten.to.dtype %1708, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1710 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1711 = torch.aten.broadcast_to %1709, %1710 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1712 = torch.valsem.aten.copy %1711, %1707, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%1713 = torch.aten.sigmoid %1712 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%1714 = torch.aten.mul.Tensor %1713, %1712 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%1715 = torch.aten.convolution %1714, %130, %131, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1716 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1717 = torch.aten.mul.Tensor %1716, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1718 = torch.aten.transpose.int %132, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
%1719 = torch.aten.mm %1717, %1718 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
%1720 = torch.aten.mul.Scalar %133, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1721 = torch.aten.add.Tensor %1720, %1719, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
%1722 = torch.aten.slice.Tensor %1721, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%1723 = torch.aten.slice.Tensor %1722, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%1724 = torch.aten.unsqueeze %1723, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%1725 = torch.aten.unsqueeze %1724, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%1726 = torch.aten.add.Tensor %1715, %1725, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1727 = torch.aten.view %1726, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%1728 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1729 = torch.aten.to.dtype %1728, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1730 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1731 = torch.aten.broadcast_to %1729, %1730 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%1732 = torch.valsem.aten.copy %1731, %1727, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%1733 = torch.aten.to.dtype %1732, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%1734 = torch.aten.sum.dim_IntList %1733, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1735 = torch.aten.div.Scalar %1734, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1736 = torch.aten.sub.Tensor %1733, %1735, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%1737 = torch.aten.mul.Tensor %1736, %1736 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%1738 = torch.aten.sum.dim_IntList %1737, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1739 = torch.aten.div.Scalar %1738, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1740 = torch.aten.to.dtype %1739, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1741 = torch.aten.sum.dim_IntList %1732, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1742 = torch.aten.div.Scalar %1741, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1743 = torch.aten.add.Tensor %1740, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1744 = torch.aten.rsqrt %1743 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1745 = torch.aten.sub.Tensor %1727, %1742, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%1746 = torch.aten.mul.Tensor %1745, %1744 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%1747 = torch.aten.view %1746, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%1748 = torch.aten.unsqueeze %134, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1749 = torch.aten.unsqueeze %1748, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1750 = torch.aten.mul.Tensor %1747, %1749 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%1751 = torch.aten.unsqueeze %135, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1752 = torch.aten.unsqueeze %1751, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1753 = torch.aten.add.Tensor %1750, %1752, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%1754 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1755 = torch.aten.to.dtype %1754, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1756 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1757 = torch.aten.broadcast_to %1755, %1756 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1758 = torch.valsem.aten.copy %1757, %1753, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%1759 = torch.aten.sigmoid %1758 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%1760 = torch.aten.mul.Tensor %1759, %1758 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%1761 = torch.aten.convolution %1760, %136, %137, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1762 = torch.aten.add.Tensor %1679, %1761, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1763 = torch.aten.div.Tensor %1762, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%1764 = torch.aten.clone %1763, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1765 = torch.aten.view %1764, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%1766 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1767 = torch.aten.to.dtype %1766, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1768 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1769 = torch.aten.broadcast_to %1767, %1768 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%1770 = torch.valsem.aten.copy %1769, %1765, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%1771 = torch.aten.to.dtype %1770, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%1772 = torch.aten.sum.dim_IntList %1771, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1773 = torch.aten.div.Scalar %1772, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1774 = torch.aten.sub.Tensor %1771, %1773, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%1775 = torch.aten.mul.Tensor %1774, %1774 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%1776 = torch.aten.sum.dim_IntList %1775, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1777 = torch.aten.div.Scalar %1776, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1778 = torch.aten.to.dtype %1777, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1779 = torch.aten.sum.dim_IntList %1770, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1780 = torch.aten.div.Scalar %1779, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1781 = torch.aten.add.Tensor %1778, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1782 = torch.aten.rsqrt %1781 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1783 = torch.aten.sub.Tensor %1765, %1780, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%1784 = torch.aten.mul.Tensor %1783, %1782 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%1785 = torch.aten.view %1784, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%1786 = torch.aten.unsqueeze %138, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1787 = torch.aten.unsqueeze %1786, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1788 = torch.aten.mul.Tensor %1785, %1787 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%1789 = torch.aten.unsqueeze %139, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%1790 = torch.aten.unsqueeze %1789, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%1791 = torch.aten.add.Tensor %1788, %1790, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%1792 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1793 = torch.aten.to.dtype %1792, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%1794 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1795 = torch.aten.broadcast_to %1793, %1794 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1796 = torch.valsem.aten.copy %1795, %1791, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%1797 = torch.aten.convolution %1796, %140, %141, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1798 = torch.aten.permute %1797, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%1799 = torch.aten.view %1798, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1800 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1801 = torch.aten.sum.dim_IntList %1799, %1800, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1802 = torch.aten.div.Scalar %1801, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1803 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1804 = torch.aten.broadcast_to %1802, %1803 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1805 = torch.aten.sub.Tensor %1799, %1804, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1806 = torch.aten.mul.Tensor %1805, %1805 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1807 = torch.aten.sum.dim_IntList %1806, %1800, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1808 = torch.aten.div.Scalar %1807, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1809 = torch.aten.add.Scalar %1808, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1810 = torch.aten.rsqrt %1809 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%1811 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1812 = torch.aten.broadcast_to %1810, %1811 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1813 = torch.aten.mul.Tensor %1805, %1812 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1814 = torch.aten.mul.Tensor %1813, %142 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1815 = torch.aten.add.Tensor %1814, %143, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1816 = torch.aten.transpose.int %144, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1817 = torch.aten.view %1815, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1818 = torch.aten.mm %1817, %1816 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1819 = torch.aten.view %1818, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1820 = torch.aten.transpose.int %145, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1821 = torch.aten.view %1815, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1822 = torch.aten.mm %1821, %1820 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1823 = torch.aten.view %1822, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1824 = torch.aten.transpose.int %146, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1825 = torch.aten.view %1815, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1826 = torch.aten.mm %1825, %1824 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1827 = torch.aten.view %1826, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1828 = torch.aten.view %1819, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1829 = torch.aten.permute %1828, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1830 = torch.aten.clone %1829, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1831 = torch.aten.view %1830, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1832 = torch.aten.view %1823, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1833 = torch.aten.permute %1832, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1834 = torch.aten.clone %1833, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1835 = torch.aten.view %1834, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1836 = torch.aten.view %1827, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1837 = torch.aten.permute %1836, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1838 = torch.aten.clone %1837, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1839 = torch.aten.view %1838, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1840 = torch.aten.transpose.int %1835, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%1841 = torch.aten.broadcast_to %1831, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1842 = torch.aten.view %1841, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1843 = torch.aten.broadcast_to %1840, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%1844 = torch.aten.view %1843, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%1845 = torch.aten.bmm %1842, %1844 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%1846 = torch.aten.view %1845, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%1847 = torch.aten.mul.Tensor %1846, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%values_10, %indices_11 = torch.aten.max.dim %1847, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%1848 = torch.aten.sub.Tensor %1847, %values_10, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
%1849 = torch.aten.exp %1848 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%1850 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1851 = torch.aten.sum.dim_IntList %1849, %1850, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%1852 = torch.aten.div.Tensor %1849, %1851 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
%1853 = torch.aten.broadcast_to %1852, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%1854 = torch.aten.view %1853, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%1855 = torch.aten.broadcast_to %1839, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1856 = torch.aten.view %1855, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1857 = torch.aten.bmm %1854, %1856 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%1858 = torch.aten.view %1857, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1859 = torch.aten.view %1858, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1860 = torch.aten.permute %1859, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1861 = torch.aten.clone %1860, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%1862 = torch.aten.view %1861, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1863 = torch.aten.transpose.int %147, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1864 = torch.aten.view %1862, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1865 = torch.aten.mm %1864, %1863 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1866 = torch.aten.mul.Scalar %148, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1867 = torch.aten.add.Tensor %1866, %1865, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%1868 = torch.aten.view %1867, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1869 = torch.aten.add.Tensor %1868, %1799, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1870 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1871 = torch.aten.sum.dim_IntList %1869, %1870, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1872 = torch.aten.div.Scalar %1871, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1873 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1874 = torch.aten.broadcast_to %1872, %1873 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1875 = torch.aten.sub.Tensor %1869, %1874, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1876 = torch.aten.mul.Tensor %1875, %1875 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1877 = torch.aten.sum.dim_IntList %1876, %1870, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1878 = torch.aten.div.Scalar %1877, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1879 = torch.aten.add.Scalar %1878, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1880 = torch.aten.rsqrt %1879 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%1881 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1882 = torch.aten.broadcast_to %1880, %1881 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1883 = torch.aten.mul.Tensor %1875, %1882 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1884 = torch.aten.mul.Tensor %1883, %149 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1885 = torch.aten.add.Tensor %1884, %150, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1886 = torch.aten.transpose.int %151, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1887 = torch.aten.view %1885, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1888 = torch.aten.mm %1887, %1886 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1889 = torch.aten.view %1888, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1890 = torch.aten.transpose.int %152, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%1891 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1892 = torch.aten.mm %1891, %1890 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%1893 = torch.aten.view %1892, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%1894 = torch.aten.transpose.int %153, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%1895 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1896 = torch.aten.mm %1895, %1894 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%1897 = torch.aten.view %1896, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%1898 = torch.aten.view %1889, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1899 = torch.aten.permute %1898, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1900 = torch.aten.clone %1899, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%1901 = torch.aten.view %1900, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1902 = torch.aten.view %1893, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%1903 = torch.aten.permute %1902, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%1904 = torch.aten.clone %1903, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%1905 = torch.aten.view %1904, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1906 = torch.aten.view %1897, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%1907 = torch.aten.permute %1906, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%1908 = torch.aten.clone %1907, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%1909 = torch.aten.view %1908, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1910 = torch.aten.transpose.int %1905, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%1911 = torch.aten.broadcast_to %1901, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1912 = torch.aten.view %1911, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1913 = torch.aten.broadcast_to %1910, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%1914 = torch.aten.view %1913, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%1915 = torch.aten.bmm %1912, %1914 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%1916 = torch.aten.view %1915, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%1917 = torch.aten.mul.Tensor %1916, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%values_12, %indices_13 = torch.aten.max.dim %1917, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%1918 = torch.aten.sub.Tensor %1917, %values_12, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
%1919 = torch.aten.exp %1918 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%1920 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1921 = torch.aten.sum.dim_IntList %1919, %1920, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%1922 = torch.aten.div.Tensor %1919, %1921 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
%1923 = torch.aten.broadcast_to %1922, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%1924 = torch.aten.view %1923, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%1925 = torch.aten.broadcast_to %1909, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1926 = torch.aten.view %1925, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%1927 = torch.aten.bmm %1924, %1926 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%1928 = torch.aten.view %1927, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%1929 = torch.aten.view %1928, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%1930 = torch.aten.permute %1929, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%1931 = torch.aten.clone %1930, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%1932 = torch.aten.view %1931, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1933 = torch.aten.transpose.int %154, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%1934 = torch.aten.view %1932, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1935 = torch.aten.mm %1934, %1933 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%1936 = torch.aten.mul.Scalar %155, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1937 = torch.aten.add.Tensor %1936, %1935, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%1938 = torch.aten.view %1937, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1939 = torch.aten.add.Tensor %1938, %1869, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1940 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%1941 = torch.aten.sum.dim_IntList %1939, %1940, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1942 = torch.aten.div.Scalar %1941, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1943 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1944 = torch.aten.broadcast_to %1942, %1943 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1945 = torch.aten.sub.Tensor %1939, %1944, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1946 = torch.aten.mul.Tensor %1945, %1945 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1947 = torch.aten.sum.dim_IntList %1946, %1940, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%1948 = torch.aten.div.Scalar %1947, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1949 = torch.aten.add.Scalar %1948, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%1950 = torch.aten.rsqrt %1949 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%1951 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1952 = torch.aten.broadcast_to %1950, %1951 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1953 = torch.aten.mul.Tensor %1945, %1952 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1954 = torch.aten.mul.Tensor %1953, %156 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%1955 = torch.aten.add.Tensor %1954, %157, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1956 = torch.aten.transpose.int %158, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
%1957 = torch.aten.view %1955, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%1958 = torch.aten.mm %1957, %1956 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
%1959 = torch.aten.mul.Scalar %159, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
%1960 = torch.aten.add.Tensor %1959, %1958, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
%1961 = torch.aten.view %1960, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%1962 = torch.aten.slice.Tensor %1961, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%1963 = torch.aten.slice.Tensor %1961, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%1964 = torch.aten.gelu %1963, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%1965 = torch.aten.mul.Tensor %1962, %1964 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%1966 = torch.aten.transpose.int %160, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
%1967 = torch.aten.view %1965, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%1968 = torch.aten.mm %1967, %1966 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
%1969 = torch.aten.mul.Scalar %161, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%1970 = torch.aten.add.Tensor %1969, %1968, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%1971 = torch.aten.view %1970, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%1972 = torch.aten.add.Tensor %1971, %1939, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%1973 = torch.aten.view %1972, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%1974 = torch.aten.permute %1973, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%1975 = torch.aten.convolution %1974, %162, %163, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1976 = torch.aten.add.Tensor %1975, %1763, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%1977 = torch.aten.convolution %1976, %164, %165, %1361, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,16,16],f16>
%1978 = torch.aten.clone %1977, %int0 : !torch.vtensor<[2,640,16,16],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16>
%1979 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1980 = torch.aten.view %1978, %1979 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16>
%1981 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%1982 = torch.aten.to.dtype %1981, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%1983 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1984 = torch.aten.broadcast_to %1982, %1983 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f32>
%1985 = torch.valsem.aten.copy %1984, %1980, %false : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,20,256],f16>, !torch.bool -> !torch.vtensor<[2,32,20,256],f32>
%1986 = torch.aten.to.dtype %1985, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f64>
%1987 = torch.aten.sum.dim_IntList %1986, %754, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1988 = torch.aten.div.Scalar %1987, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1989 = torch.aten.sub.Tensor %1986, %1988, %float1.000000e00 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,256],f64>
%1990 = torch.aten.mul.Tensor %1989, %1989 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,20,256],f64> -> !torch.vtensor<[2,32,20,256],f64>
%1991 = torch.aten.sum.dim_IntList %1990, %754, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%1992 = torch.aten.div.Scalar %1991, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%1993 = torch.aten.to.dtype %1992, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1994 = torch.aten.sum.dim_IntList %1985, %754, %true, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1995 = torch.aten.div.Scalar %1994, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1996 = torch.aten.add.Tensor %1993, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1997 = torch.aten.rsqrt %1996 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1998 = torch.aten.sub.Tensor %1980, %1995, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,256],f32>
%1999 = torch.aten.mul.Tensor %1998, %1997 : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,256],f32>
%2000 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2001 = torch.aten.view %1999, %2000 : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f32>
%2002 = torch.aten.unsqueeze %166, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2003 = torch.aten.unsqueeze %2002, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2004 = torch.aten.mul.Tensor %2001, %2003 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f32>
%2005 = torch.aten.unsqueeze %167, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2006 = torch.aten.unsqueeze %2005, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2007 = torch.aten.add.Tensor %2004, %2006, %int1 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f32>
%2008 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2009 = torch.aten.to.dtype %2008, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2010 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2011 = torch.aten.broadcast_to %2009, %2010 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16>
%2012 = torch.valsem.aten.copy %2011, %2007, %false : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f32>, !torch.bool -> !torch.vtensor<[2,640,16,16],f16>
%2013 = torch.aten.sigmoid %2012 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16>
%2014 = torch.aten.mul.Tensor %2013, %2012 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16>
%2015 = torch.aten.convolution %2014, %168, %169, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2016 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2017 = torch.aten.mul.Tensor %2016, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2018 = torch.aten.transpose.int %170, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2019 = torch.aten.mm %2017, %2018 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2020 = torch.aten.mul.Scalar %171, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2021 = torch.aten.add.Tensor %2020, %2019, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%2022 = torch.aten.slice.Tensor %2021, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2023 = torch.aten.slice.Tensor %2022, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2024 = torch.aten.unsqueeze %2023, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2025 = torch.aten.unsqueeze %2024, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2026 = torch.aten.add.Tensor %2015, %2025, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2027 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2028 = torch.aten.view %2026, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2029 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2030 = torch.aten.to.dtype %2029, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2031 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2032 = torch.aten.broadcast_to %2030, %2031 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%2033 = torch.valsem.aten.copy %2032, %2028, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%2034 = torch.aten.to.dtype %2033, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%2035 = torch.aten.sum.dim_IntList %2034, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2036 = torch.aten.div.Scalar %2035, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2037 = torch.aten.sub.Tensor %2034, %2036, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%2038 = torch.aten.mul.Tensor %2037, %2037 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%2039 = torch.aten.sum.dim_IntList %2038, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2040 = torch.aten.div.Scalar %2039, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2041 = torch.aten.to.dtype %2040, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2042 = torch.aten.sum.dim_IntList %2033, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2043 = torch.aten.div.Scalar %2042, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2044 = torch.aten.add.Tensor %2041, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2045 = torch.aten.rsqrt %2044 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2046 = torch.aten.sub.Tensor %2028, %2043, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2047 = torch.aten.mul.Tensor %2046, %2045 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2048 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2049 = torch.aten.view %2047, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2050 = torch.aten.unsqueeze %172, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2051 = torch.aten.unsqueeze %2050, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2052 = torch.aten.mul.Tensor %2049, %2051 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2053 = torch.aten.unsqueeze %173, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2054 = torch.aten.unsqueeze %2053, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2055 = torch.aten.add.Tensor %2052, %2054, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2056 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2057 = torch.aten.to.dtype %2056, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2058 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2059 = torch.aten.broadcast_to %2057, %2058 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2060 = torch.valsem.aten.copy %2059, %2055, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2061 = torch.aten.sigmoid %2060 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2062 = torch.aten.mul.Tensor %2061, %2060 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2063 = torch.aten.convolution %2062, %174, %175, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2064 = torch.aten.convolution %1977, %176, %177, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2065 = torch.aten.add.Tensor %2064, %2063, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2066 = torch.aten.div.Tensor %2065, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%2067 = torch.aten.clone %2066, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2068 = torch.aten.view %2067, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2069 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2070 = torch.aten.to.dtype %2069, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2071 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2072 = torch.aten.broadcast_to %2070, %2071 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%2073 = torch.valsem.aten.copy %2072, %2068, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%2074 = torch.aten.to.dtype %2073, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%2075 = torch.aten.sum.dim_IntList %2074, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2076 = torch.aten.div.Scalar %2075, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2077 = torch.aten.sub.Tensor %2074, %2076, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%2078 = torch.aten.mul.Tensor %2077, %2077 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%2079 = torch.aten.sum.dim_IntList %2078, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2080 = torch.aten.div.Scalar %2079, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2081 = torch.aten.to.dtype %2080, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2082 = torch.aten.sum.dim_IntList %2073, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2083 = torch.aten.div.Scalar %2082, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2084 = torch.aten.add.Tensor %2081, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2085 = torch.aten.rsqrt %2084 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2086 = torch.aten.sub.Tensor %2068, %2083, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2087 = torch.aten.mul.Tensor %2086, %2085 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2088 = torch.aten.view %2087, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2089 = torch.aten.unsqueeze %178, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2090 = torch.aten.unsqueeze %2089, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2091 = torch.aten.mul.Tensor %2088, %2090 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2092 = torch.aten.unsqueeze %179, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2093 = torch.aten.unsqueeze %2092, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2094 = torch.aten.add.Tensor %2091, %2093, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2095 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2096 = torch.aten.to.dtype %2095, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2097 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2098 = torch.aten.broadcast_to %2096, %2097 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2099 = torch.valsem.aten.copy %2098, %2094, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2100 = torch.aten.convolution %2099, %180, %181, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2101 = torch.aten.permute %2100, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2102 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2103 = torch.aten.view %2101, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2104 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2105 = torch.aten.sum.dim_IntList %2103, %2104, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2106 = torch.aten.div.Scalar %2105, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2107 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2108 = torch.aten.broadcast_to %2106, %2107 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2109 = torch.aten.sub.Tensor %2103, %2108, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2110 = torch.aten.mul.Tensor %2109, %2109 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2111 = torch.aten.sum.dim_IntList %2110, %2104, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2112 = torch.aten.div.Scalar %2111, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2113 = torch.aten.add.Scalar %2112, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2114 = torch.aten.rsqrt %2113 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%2115 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2116 = torch.aten.broadcast_to %2114, %2115 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2117 = torch.aten.mul.Tensor %2109, %2116 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2118 = torch.aten.mul.Tensor %2117, %182 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2119 = torch.aten.add.Tensor %2118, %183, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2120 = torch.aten.transpose.int %184, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2121 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%2122 = torch.aten.view %2119, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2123 = torch.aten.mm %2122, %2120 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2124 = torch.aten.view %2123, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2125 = torch.aten.transpose.int %185, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2126 = torch.aten.view %2119, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2127 = torch.aten.mm %2126, %2125 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2128 = torch.aten.view %2127, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2129 = torch.aten.transpose.int %186, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2130 = torch.aten.view %2119, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2131 = torch.aten.mm %2130, %2129 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2132 = torch.aten.view %2131, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2133 = torch.prim.ListConstruct %int2, %int256, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2134 = torch.aten.view %2124, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2135 = torch.aten.permute %2134, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2136 = torch.aten.clone %2135, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2137 = torch.prim.ListConstruct %int16, %int256, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2138 = torch.aten.view %2136, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2139 = torch.aten.view %2128, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2140 = torch.aten.permute %2139, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2141 = torch.aten.clone %2140, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2142 = torch.aten.view %2141, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2143 = torch.aten.view %2132, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2144 = torch.aten.permute %2143, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2145 = torch.aten.clone %2144, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2146 = torch.aten.view %2145, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2147 = torch.aten.transpose.int %2142, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%2148 = torch.aten.broadcast_to %2138, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2149 = torch.aten.view %2148, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2150 = torch.prim.ListConstruct %int16, %int160, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2151 = torch.aten.broadcast_to %2147, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%2152 = torch.aten.view %2151, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%2153 = torch.aten.bmm %2149, %2152 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%2154 = torch.prim.ListConstruct %int16, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2155 = torch.aten.view %2153, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2156 = torch.aten.mul.Tensor %2155, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%values_14, %indices_15 = torch.aten.max.dim %2156, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%2157 = torch.aten.sub.Tensor %2156, %values_14, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
%2158 = torch.aten.exp %2157 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
%2159 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%2160 = torch.aten.sum.dim_IntList %2158, %2159, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%2161 = torch.aten.div.Tensor %2158, %2160 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
%2162 = torch.aten.broadcast_to %2161, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2163 = torch.aten.view %2162, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2164 = torch.aten.broadcast_to %2146, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2165 = torch.aten.view %2164, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2166 = torch.aten.bmm %2163, %2165 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2167 = torch.aten.view %2166, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2168 = torch.prim.ListConstruct %int2, %int8, %int256, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2169 = torch.aten.view %2167, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2170 = torch.aten.permute %2169, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2171 = torch.aten.clone %2170, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2172 = torch.aten.view %2171, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2173 = torch.aten.transpose.int %187, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2174 = torch.aten.view %2172, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2175 = torch.aten.mm %2174, %2173 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2176 = torch.aten.mul.Scalar %188, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2177 = torch.aten.add.Tensor %2176, %2175, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%2178 = torch.aten.view %2177, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2179 = torch.aten.add.Tensor %2178, %2103, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2180 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2181 = torch.aten.sum.dim_IntList %2179, %2180, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2182 = torch.aten.div.Scalar %2181, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2183 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2184 = torch.aten.broadcast_to %2182, %2183 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2185 = torch.aten.sub.Tensor %2179, %2184, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2186 = torch.aten.mul.Tensor %2185, %2185 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2187 = torch.aten.sum.dim_IntList %2186, %2180, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2188 = torch.aten.div.Scalar %2187, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2189 = torch.aten.add.Scalar %2188, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2190 = torch.aten.rsqrt %2189 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%2191 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2192 = torch.aten.broadcast_to %2190, %2191 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2193 = torch.aten.mul.Tensor %2185, %2192 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2194 = torch.aten.mul.Tensor %2193, %189 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2195 = torch.aten.add.Tensor %2194, %190, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2196 = torch.aten.transpose.int %191, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2197 = torch.aten.view %2195, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2198 = torch.aten.mm %2197, %2196 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2199 = torch.aten.view %2198, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2200 = torch.aten.transpose.int %192, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%2201 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2202 = torch.aten.mm %2201, %2200 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2203 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2204 = torch.aten.view %2202, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2205 = torch.aten.transpose.int %193, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%2206 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2207 = torch.aten.mm %2206, %2205 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2208 = torch.aten.view %2207, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2209 = torch.aten.view %2199, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2210 = torch.aten.permute %2209, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2211 = torch.aten.clone %2210, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2212 = torch.aten.view %2211, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2213 = torch.prim.ListConstruct %int2, %int77, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2214 = torch.aten.view %2204, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2215 = torch.aten.permute %2214, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2216 = torch.aten.clone %2215, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2217 = torch.prim.ListConstruct %int16, %int77, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2218 = torch.aten.view %2216, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2219 = torch.aten.view %2208, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2220 = torch.aten.permute %2219, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2221 = torch.aten.clone %2220, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2222 = torch.aten.view %2221, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2223 = torch.aten.transpose.int %2218, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%2224 = torch.aten.broadcast_to %2212, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2225 = torch.aten.view %2224, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2226 = torch.prim.ListConstruct %int16, %int160, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2227 = torch.aten.broadcast_to %2223, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%2228 = torch.aten.view %2227, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%2229 = torch.aten.bmm %2225, %2228 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%2230 = torch.prim.ListConstruct %int16, %int256, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2231 = torch.aten.view %2229, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2232 = torch.aten.mul.Tensor %2231, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%values_16, %indices_17 = torch.aten.max.dim %2232, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%2233 = torch.aten.sub.Tensor %2232, %values_16, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
%2234 = torch.aten.exp %2233 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
%2235 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%2236 = torch.aten.sum.dim_IntList %2234, %2235, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%2237 = torch.aten.div.Tensor %2234, %2236 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
%2238 = torch.aten.broadcast_to %2237, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2239 = torch.aten.view %2238, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2240 = torch.aten.broadcast_to %2222, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2241 = torch.aten.view %2240, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2242 = torch.aten.bmm %2239, %2241 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2243 = torch.aten.view %2242, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2244 = torch.aten.view %2243, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2245 = torch.aten.permute %2244, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2246 = torch.aten.clone %2245, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2247 = torch.aten.view %2246, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2248 = torch.aten.transpose.int %194, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2249 = torch.aten.view %2247, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2250 = torch.aten.mm %2249, %2248 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2251 = torch.aten.mul.Scalar %195, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2252 = torch.aten.add.Tensor %2251, %2250, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%2253 = torch.aten.view %2252, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2254 = torch.aten.add.Tensor %2253, %2179, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2255 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2256 = torch.aten.sum.dim_IntList %2254, %2255, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2257 = torch.aten.div.Scalar %2256, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2258 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2259 = torch.aten.broadcast_to %2257, %2258 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2260 = torch.aten.sub.Tensor %2254, %2259, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2261 = torch.aten.mul.Tensor %2260, %2260 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2262 = torch.aten.sum.dim_IntList %2261, %2255, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2263 = torch.aten.div.Scalar %2262, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2264 = torch.aten.add.Scalar %2263, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2265 = torch.aten.rsqrt %2264 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%2266 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2267 = torch.aten.broadcast_to %2265, %2266 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2268 = torch.aten.mul.Tensor %2260, %2267 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2269 = torch.aten.mul.Tensor %2268, %196 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2270 = torch.aten.add.Tensor %2269, %197, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2271 = torch.aten.transpose.int %198, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
%2272 = torch.aten.view %2270, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2273 = torch.aten.mm %2272, %2271 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
%2274 = torch.aten.mul.Scalar %199, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
%2275 = torch.aten.add.Tensor %2274, %2273, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
%2276 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2277 = torch.aten.view %2275, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%2278 = torch.aten.slice.Tensor %2277, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2279 = torch.aten.slice.Tensor %2277, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2280 = torch.aten.gelu %2279, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%2281 = torch.aten.mul.Tensor %2278, %2280 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%2282 = torch.aten.transpose.int %200, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
%2283 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
%2284 = torch.aten.view %2281, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%2285 = torch.aten.mm %2284, %2282 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2286 = torch.aten.mul.Scalar %201, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2287 = torch.aten.add.Tensor %2286, %2285, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%2288 = torch.aten.view %2287, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2289 = torch.aten.add.Tensor %2288, %2254, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2290 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2291 = torch.aten.view %2289, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2292 = torch.aten.permute %2291, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2293 = torch.aten.convolution %2292, %202, %203, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2294 = torch.aten.add.Tensor %2293, %2066, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2295 = torch.aten.clone %2294, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2296 = torch.aten.view %2295, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2297 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2298 = torch.aten.to.dtype %2297, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2299 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2300 = torch.aten.broadcast_to %2298, %2299 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%2301 = torch.valsem.aten.copy %2300, %2296, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%2302 = torch.aten.to.dtype %2301, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%2303 = torch.aten.sum.dim_IntList %2302, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2304 = torch.aten.div.Scalar %2303, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2305 = torch.aten.sub.Tensor %2302, %2304, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%2306 = torch.aten.mul.Tensor %2305, %2305 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%2307 = torch.aten.sum.dim_IntList %2306, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2308 = torch.aten.div.Scalar %2307, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2309 = torch.aten.to.dtype %2308, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2310 = torch.aten.sum.dim_IntList %2301, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2311 = torch.aten.div.Scalar %2310, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2312 = torch.aten.add.Tensor %2309, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2313 = torch.aten.rsqrt %2312 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2314 = torch.aten.sub.Tensor %2296, %2311, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2315 = torch.aten.mul.Tensor %2314, %2313 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2316 = torch.aten.view %2315, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2317 = torch.aten.unsqueeze %204, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2318 = torch.aten.unsqueeze %2317, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2319 = torch.aten.mul.Tensor %2316, %2318 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2320 = torch.aten.unsqueeze %205, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2321 = torch.aten.unsqueeze %2320, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2322 = torch.aten.add.Tensor %2319, %2321, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2323 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2324 = torch.aten.to.dtype %2323, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2325 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2326 = torch.aten.broadcast_to %2324, %2325 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2327 = torch.valsem.aten.copy %2326, %2322, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2328 = torch.aten.sigmoid %2327 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2329 = torch.aten.mul.Tensor %2328, %2327 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2330 = torch.aten.convolution %2329, %206, %207, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2331 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2332 = torch.aten.mul.Tensor %2331, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2333 = torch.aten.transpose.int %208, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2334 = torch.aten.mm %2332, %2333 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2335 = torch.aten.mul.Scalar %209, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2336 = torch.aten.add.Tensor %2335, %2334, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%2337 = torch.aten.slice.Tensor %2336, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2338 = torch.aten.slice.Tensor %2337, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2339 = torch.aten.unsqueeze %2338, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2340 = torch.aten.unsqueeze %2339, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2341 = torch.aten.add.Tensor %2330, %2340, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2342 = torch.aten.view %2341, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2343 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2344 = torch.aten.to.dtype %2343, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2345 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2346 = torch.aten.broadcast_to %2344, %2345 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%2347 = torch.valsem.aten.copy %2346, %2342, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%2348 = torch.aten.to.dtype %2347, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%2349 = torch.aten.sum.dim_IntList %2348, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2350 = torch.aten.div.Scalar %2349, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2351 = torch.aten.sub.Tensor %2348, %2350, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%2352 = torch.aten.mul.Tensor %2351, %2351 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%2353 = torch.aten.sum.dim_IntList %2352, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2354 = torch.aten.div.Scalar %2353, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2355 = torch.aten.to.dtype %2354, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2356 = torch.aten.sum.dim_IntList %2347, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2357 = torch.aten.div.Scalar %2356, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2358 = torch.aten.add.Tensor %2355, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2359 = torch.aten.rsqrt %2358 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2360 = torch.aten.sub.Tensor %2342, %2357, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2361 = torch.aten.mul.Tensor %2360, %2359 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2362 = torch.aten.view %2361, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2363 = torch.aten.unsqueeze %210, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2364 = torch.aten.unsqueeze %2363, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2365 = torch.aten.mul.Tensor %2362, %2364 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2366 = torch.aten.unsqueeze %211, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2367 = torch.aten.unsqueeze %2366, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2368 = torch.aten.add.Tensor %2365, %2367, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2369 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2370 = torch.aten.to.dtype %2369, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2371 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2372 = torch.aten.broadcast_to %2370, %2371 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2373 = torch.valsem.aten.copy %2372, %2368, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2374 = torch.aten.sigmoid %2373 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2375 = torch.aten.mul.Tensor %2374, %2373 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2376 = torch.aten.convolution %2375, %212, %213, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2377 = torch.aten.add.Tensor %2294, %2376, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2378 = torch.aten.div.Tensor %2377, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%2379 = torch.aten.clone %2378, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2380 = torch.aten.view %2379, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2381 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2382 = torch.aten.to.dtype %2381, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2383 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2384 = torch.aten.broadcast_to %2382, %2383 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%2385 = torch.valsem.aten.copy %2384, %2380, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%2386 = torch.aten.to.dtype %2385, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%2387 = torch.aten.sum.dim_IntList %2386, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2388 = torch.aten.div.Scalar %2387, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2389 = torch.aten.sub.Tensor %2386, %2388, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%2390 = torch.aten.mul.Tensor %2389, %2389 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%2391 = torch.aten.sum.dim_IntList %2390, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2392 = torch.aten.div.Scalar %2391, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2393 = torch.aten.to.dtype %2392, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2394 = torch.aten.sum.dim_IntList %2385, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2395 = torch.aten.div.Scalar %2394, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2396 = torch.aten.add.Tensor %2393, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2397 = torch.aten.rsqrt %2396 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2398 = torch.aten.sub.Tensor %2380, %2395, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2399 = torch.aten.mul.Tensor %2398, %2397 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2400 = torch.aten.view %2399, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2401 = torch.aten.unsqueeze %214, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2402 = torch.aten.unsqueeze %2401, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2403 = torch.aten.mul.Tensor %2400, %2402 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2404 = torch.aten.unsqueeze %215, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2405 = torch.aten.unsqueeze %2404, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2406 = torch.aten.add.Tensor %2403, %2405, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2407 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2408 = torch.aten.to.dtype %2407, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2409 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2410 = torch.aten.broadcast_to %2408, %2409 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2411 = torch.valsem.aten.copy %2410, %2406, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2412 = torch.aten.convolution %2411, %216, %217, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2413 = torch.aten.permute %2412, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2414 = torch.aten.view %2413, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2415 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2416 = torch.aten.sum.dim_IntList %2414, %2415, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2417 = torch.aten.div.Scalar %2416, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2418 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2419 = torch.aten.broadcast_to %2417, %2418 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2420 = torch.aten.sub.Tensor %2414, %2419, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2421 = torch.aten.mul.Tensor %2420, %2420 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2422 = torch.aten.sum.dim_IntList %2421, %2415, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2423 = torch.aten.div.Scalar %2422, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2424 = torch.aten.add.Scalar %2423, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2425 = torch.aten.rsqrt %2424 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%2426 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2427 = torch.aten.broadcast_to %2425, %2426 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2428 = torch.aten.mul.Tensor %2420, %2427 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2429 = torch.aten.mul.Tensor %2428, %218 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2430 = torch.aten.add.Tensor %2429, %219, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2431 = torch.aten.transpose.int %220, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2432 = torch.aten.view %2430, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2433 = torch.aten.mm %2432, %2431 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2434 = torch.aten.view %2433, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2435 = torch.aten.transpose.int %221, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2436 = torch.aten.view %2430, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2437 = torch.aten.mm %2436, %2435 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2438 = torch.aten.view %2437, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2439 = torch.aten.transpose.int %222, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2440 = torch.aten.view %2430, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2441 = torch.aten.mm %2440, %2439 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2442 = torch.aten.view %2441, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2443 = torch.aten.view %2434, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2444 = torch.aten.permute %2443, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2445 = torch.aten.clone %2444, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2446 = torch.aten.view %2445, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2447 = torch.aten.view %2438, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2448 = torch.aten.permute %2447, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2449 = torch.aten.clone %2448, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2450 = torch.aten.view %2449, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2451 = torch.aten.view %2442, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2452 = torch.aten.permute %2451, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2453 = torch.aten.clone %2452, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2454 = torch.aten.view %2453, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2455 = torch.aten.transpose.int %2450, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%2456 = torch.aten.broadcast_to %2446, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2457 = torch.aten.view %2456, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2458 = torch.aten.broadcast_to %2455, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%2459 = torch.aten.view %2458, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%2460 = torch.aten.bmm %2457, %2459 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%2461 = torch.aten.view %2460, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2462 = torch.aten.mul.Tensor %2461, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%values_18, %indices_19 = torch.aten.max.dim %2462, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%2463 = torch.aten.sub.Tensor %2462, %values_18, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
%2464 = torch.aten.exp %2463 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
%2465 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%2466 = torch.aten.sum.dim_IntList %2464, %2465, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%2467 = torch.aten.div.Tensor %2464, %2466 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
%2468 = torch.aten.broadcast_to %2467, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2469 = torch.aten.view %2468, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2470 = torch.aten.broadcast_to %2454, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2471 = torch.aten.view %2470, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2472 = torch.aten.bmm %2469, %2471 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2473 = torch.aten.view %2472, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2474 = torch.aten.view %2473, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2475 = torch.aten.permute %2474, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2476 = torch.aten.clone %2475, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2477 = torch.aten.view %2476, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2478 = torch.aten.transpose.int %223, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2479 = torch.aten.view %2477, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2480 = torch.aten.mm %2479, %2478 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2481 = torch.aten.mul.Scalar %224, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2482 = torch.aten.add.Tensor %2481, %2480, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%2483 = torch.aten.view %2482, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2484 = torch.aten.add.Tensor %2483, %2414, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2485 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2486 = torch.aten.sum.dim_IntList %2484, %2485, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2487 = torch.aten.div.Scalar %2486, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2488 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2489 = torch.aten.broadcast_to %2487, %2488 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2490 = torch.aten.sub.Tensor %2484, %2489, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2491 = torch.aten.mul.Tensor %2490, %2490 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2492 = torch.aten.sum.dim_IntList %2491, %2485, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2493 = torch.aten.div.Scalar %2492, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2494 = torch.aten.add.Scalar %2493, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2495 = torch.aten.rsqrt %2494 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%2496 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2497 = torch.aten.broadcast_to %2495, %2496 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2498 = torch.aten.mul.Tensor %2490, %2497 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2499 = torch.aten.mul.Tensor %2498, %225 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2500 = torch.aten.add.Tensor %2499, %226, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2501 = torch.aten.transpose.int %227, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2502 = torch.aten.view %2500, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2503 = torch.aten.mm %2502, %2501 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2504 = torch.aten.view %2503, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2505 = torch.aten.transpose.int %228, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%2506 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2507 = torch.aten.mm %2506, %2505 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2508 = torch.aten.view %2507, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2509 = torch.aten.transpose.int %229, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%2510 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2511 = torch.aten.mm %2510, %2509 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2512 = torch.aten.view %2511, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2513 = torch.aten.view %2504, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2514 = torch.aten.permute %2513, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2515 = torch.aten.clone %2514, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2516 = torch.aten.view %2515, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2517 = torch.aten.view %2508, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2518 = torch.aten.permute %2517, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2519 = torch.aten.clone %2518, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2520 = torch.aten.view %2519, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2521 = torch.aten.view %2512, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2522 = torch.aten.permute %2521, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2523 = torch.aten.clone %2522, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2524 = torch.aten.view %2523, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2525 = torch.aten.transpose.int %2520, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%2526 = torch.aten.broadcast_to %2516, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2527 = torch.aten.view %2526, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2528 = torch.aten.broadcast_to %2525, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%2529 = torch.aten.view %2528, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%2530 = torch.aten.bmm %2527, %2529 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%2531 = torch.aten.view %2530, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2532 = torch.aten.mul.Tensor %2531, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%values_20, %indices_21 = torch.aten.max.dim %2532, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%2533 = torch.aten.sub.Tensor %2532, %values_20, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
%2534 = torch.aten.exp %2533 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
%2535 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%2536 = torch.aten.sum.dim_IntList %2534, %2535, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%2537 = torch.aten.div.Tensor %2534, %2536 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
%2538 = torch.aten.broadcast_to %2537, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2539 = torch.aten.view %2538, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2540 = torch.aten.broadcast_to %2524, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2541 = torch.aten.view %2540, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2542 = torch.aten.bmm %2539, %2541 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2543 = torch.aten.view %2542, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2544 = torch.aten.view %2543, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2545 = torch.aten.permute %2544, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2546 = torch.aten.clone %2545, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2547 = torch.aten.view %2546, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2548 = torch.aten.transpose.int %230, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2549 = torch.aten.view %2547, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2550 = torch.aten.mm %2549, %2548 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2551 = torch.aten.mul.Scalar %231, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2552 = torch.aten.add.Tensor %2551, %2550, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%2553 = torch.aten.view %2552, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2554 = torch.aten.add.Tensor %2553, %2484, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2555 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2556 = torch.aten.sum.dim_IntList %2554, %2555, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2557 = torch.aten.div.Scalar %2556, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2558 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2559 = torch.aten.broadcast_to %2557, %2558 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2560 = torch.aten.sub.Tensor %2554, %2559, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2561 = torch.aten.mul.Tensor %2560, %2560 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2562 = torch.aten.sum.dim_IntList %2561, %2555, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%2563 = torch.aten.div.Scalar %2562, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2564 = torch.aten.add.Scalar %2563, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%2565 = torch.aten.rsqrt %2564 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%2566 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2567 = torch.aten.broadcast_to %2565, %2566 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2568 = torch.aten.mul.Tensor %2560, %2567 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2569 = torch.aten.mul.Tensor %2568, %232 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%2570 = torch.aten.add.Tensor %2569, %233, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2571 = torch.aten.transpose.int %234, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
%2572 = torch.aten.view %2570, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2573 = torch.aten.mm %2572, %2571 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
%2574 = torch.aten.mul.Scalar %235, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
%2575 = torch.aten.add.Tensor %2574, %2573, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
%2576 = torch.aten.view %2575, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%2577 = torch.aten.slice.Tensor %2576, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2578 = torch.aten.slice.Tensor %2576, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2579 = torch.aten.gelu %2578, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%2580 = torch.aten.mul.Tensor %2577, %2579 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%2581 = torch.aten.transpose.int %236, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
%2582 = torch.aten.view %2580, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%2583 = torch.aten.mm %2582, %2581 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2584 = torch.aten.mul.Scalar %237, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2585 = torch.aten.add.Tensor %2584, %2583, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%2586 = torch.aten.view %2585, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2587 = torch.aten.add.Tensor %2586, %2554, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2588 = torch.aten.view %2587, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2589 = torch.aten.permute %2588, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2590 = torch.aten.convolution %2589, %238, %239, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2591 = torch.aten.add.Tensor %2590, %2378, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2592 = torch.aten.convolution %2591, %240, %241, %1361, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2593 = torch.aten.clone %2592, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2594 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2595 = torch.aten.view %2593, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2596 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2597 = torch.aten.to.dtype %2596, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2598 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2599 = torch.aten.broadcast_to %2597, %2598 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2600 = torch.valsem.aten.copy %2599, %2595, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2601 = torch.aten.to.dtype %2600, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2602 = torch.aten.sum.dim_IntList %2601, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2603 = torch.aten.div.Scalar %2602, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2604 = torch.aten.sub.Tensor %2601, %2603, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2605 = torch.aten.mul.Tensor %2604, %2604 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2606 = torch.aten.sum.dim_IntList %2605, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2607 = torch.aten.div.Scalar %2606, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2608 = torch.aten.to.dtype %2607, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2609 = torch.aten.sum.dim_IntList %2600, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2610 = torch.aten.div.Scalar %2609, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2611 = torch.aten.add.Tensor %2608, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2612 = torch.aten.rsqrt %2611 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2613 = torch.aten.sub.Tensor %2595, %2610, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2614 = torch.aten.mul.Tensor %2613, %2612 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2615 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2616 = torch.aten.view %2614, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2617 = torch.aten.unsqueeze %242, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2618 = torch.aten.unsqueeze %2617, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2619 = torch.aten.mul.Tensor %2616, %2618 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2620 = torch.aten.unsqueeze %243, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2621 = torch.aten.unsqueeze %2620, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2622 = torch.aten.add.Tensor %2619, %2621, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2623 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2624 = torch.aten.to.dtype %2623, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2625 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2626 = torch.aten.broadcast_to %2624, %2625 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2627 = torch.valsem.aten.copy %2626, %2622, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2628 = torch.aten.sigmoid %2627 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2629 = torch.aten.mul.Tensor %2628, %2627 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2630 = torch.aten.convolution %2629, %244, %245, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2631 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2632 = torch.aten.mul.Tensor %2631, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2633 = torch.aten.transpose.int %246, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2634 = torch.aten.mm %2632, %2633 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2635 = torch.aten.mul.Scalar %247, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2636 = torch.aten.add.Tensor %2635, %2634, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%2637 = torch.aten.slice.Tensor %2636, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2638 = torch.aten.slice.Tensor %2637, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2639 = torch.aten.unsqueeze %2638, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2640 = torch.aten.unsqueeze %2639, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2641 = torch.aten.add.Tensor %2630, %2640, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2642 = torch.aten.view %2641, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2643 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2644 = torch.aten.to.dtype %2643, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2645 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2646 = torch.aten.broadcast_to %2644, %2645 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2647 = torch.valsem.aten.copy %2646, %2642, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2648 = torch.aten.to.dtype %2647, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2649 = torch.aten.sum.dim_IntList %2648, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2650 = torch.aten.div.Scalar %2649, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2651 = torch.aten.sub.Tensor %2648, %2650, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2652 = torch.aten.mul.Tensor %2651, %2651 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2653 = torch.aten.sum.dim_IntList %2652, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2654 = torch.aten.div.Scalar %2653, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2655 = torch.aten.to.dtype %2654, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2656 = torch.aten.sum.dim_IntList %2647, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2657 = torch.aten.div.Scalar %2656, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2658 = torch.aten.add.Tensor %2655, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2659 = torch.aten.rsqrt %2658 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2660 = torch.aten.sub.Tensor %2642, %2657, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2661 = torch.aten.mul.Tensor %2660, %2659 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2662 = torch.aten.view %2661, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2663 = torch.aten.unsqueeze %248, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2664 = torch.aten.unsqueeze %2663, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2665 = torch.aten.mul.Tensor %2662, %2664 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2666 = torch.aten.unsqueeze %249, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2667 = torch.aten.unsqueeze %2666, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2668 = torch.aten.add.Tensor %2665, %2667, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2669 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2670 = torch.aten.to.dtype %2669, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2671 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2672 = torch.aten.broadcast_to %2670, %2671 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2673 = torch.valsem.aten.copy %2672, %2668, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2674 = torch.aten.sigmoid %2673 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2675 = torch.aten.mul.Tensor %2674, %2673 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2676 = torch.aten.convolution %2675, %250, %251, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2677 = torch.aten.add.Tensor %2592, %2676, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2678 = torch.aten.div.Tensor %2677, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%2679 = torch.aten.clone %2678, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2680 = torch.aten.view %2679, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2681 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2682 = torch.aten.to.dtype %2681, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2683 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2684 = torch.aten.broadcast_to %2682, %2683 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2685 = torch.valsem.aten.copy %2684, %2680, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2686 = torch.aten.to.dtype %2685, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2687 = torch.aten.sum.dim_IntList %2686, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2688 = torch.aten.div.Scalar %2687, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2689 = torch.aten.sub.Tensor %2686, %2688, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2690 = torch.aten.mul.Tensor %2689, %2689 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2691 = torch.aten.sum.dim_IntList %2690, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2692 = torch.aten.div.Scalar %2691, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2693 = torch.aten.to.dtype %2692, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2694 = torch.aten.sum.dim_IntList %2685, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2695 = torch.aten.div.Scalar %2694, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2696 = torch.aten.add.Tensor %2693, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2697 = torch.aten.rsqrt %2696 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2698 = torch.aten.sub.Tensor %2680, %2695, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2699 = torch.aten.mul.Tensor %2698, %2697 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2700 = torch.aten.view %2699, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2701 = torch.aten.unsqueeze %252, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2702 = torch.aten.unsqueeze %2701, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2703 = torch.aten.mul.Tensor %2700, %2702 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2704 = torch.aten.unsqueeze %253, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2705 = torch.aten.unsqueeze %2704, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2706 = torch.aten.add.Tensor %2703, %2705, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2707 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2708 = torch.aten.to.dtype %2707, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2709 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2710 = torch.aten.broadcast_to %2708, %2709 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2711 = torch.valsem.aten.copy %2710, %2706, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2712 = torch.aten.sigmoid %2711 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2713 = torch.aten.mul.Tensor %2712, %2711 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2714 = torch.aten.convolution %2713, %254, %255, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2715 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2716 = torch.aten.mul.Tensor %2715, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2717 = torch.aten.transpose.int %256, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2718 = torch.aten.mm %2716, %2717 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2719 = torch.aten.mul.Scalar %257, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2720 = torch.aten.add.Tensor %2719, %2718, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%2721 = torch.aten.slice.Tensor %2720, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2722 = torch.aten.slice.Tensor %2721, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2723 = torch.aten.unsqueeze %2722, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2724 = torch.aten.unsqueeze %2723, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2725 = torch.aten.add.Tensor %2714, %2724, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2726 = torch.aten.view %2725, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2727 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2728 = torch.aten.to.dtype %2727, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2729 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2730 = torch.aten.broadcast_to %2728, %2729 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2731 = torch.valsem.aten.copy %2730, %2726, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2732 = torch.aten.to.dtype %2731, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2733 = torch.aten.sum.dim_IntList %2732, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2734 = torch.aten.div.Scalar %2733, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2735 = torch.aten.sub.Tensor %2732, %2734, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2736 = torch.aten.mul.Tensor %2735, %2735 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2737 = torch.aten.sum.dim_IntList %2736, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2738 = torch.aten.div.Scalar %2737, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2739 = torch.aten.to.dtype %2738, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2740 = torch.aten.sum.dim_IntList %2731, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2741 = torch.aten.div.Scalar %2740, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2742 = torch.aten.add.Tensor %2739, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2743 = torch.aten.rsqrt %2742 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2744 = torch.aten.sub.Tensor %2726, %2741, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2745 = torch.aten.mul.Tensor %2744, %2743 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2746 = torch.aten.view %2745, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2747 = torch.aten.unsqueeze %258, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2748 = torch.aten.unsqueeze %2747, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2749 = torch.aten.mul.Tensor %2746, %2748 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2750 = torch.aten.unsqueeze %259, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2751 = torch.aten.unsqueeze %2750, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2752 = torch.aten.add.Tensor %2749, %2751, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2753 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2754 = torch.aten.to.dtype %2753, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2755 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2756 = torch.aten.broadcast_to %2754, %2755 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2757 = torch.valsem.aten.copy %2756, %2752, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2758 = torch.aten.sigmoid %2757 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2759 = torch.aten.mul.Tensor %2758, %2757 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2760 = torch.aten.convolution %2759, %260, %261, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2761 = torch.aten.add.Tensor %2678, %2760, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2762 = torch.aten.div.Tensor %2761, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%2763 = torch.aten.clone %2762, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2764 = torch.aten.view %2763, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2765 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2766 = torch.aten.to.dtype %2765, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2767 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2768 = torch.aten.broadcast_to %2766, %2767 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2769 = torch.valsem.aten.copy %2768, %2764, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2770 = torch.aten.to.dtype %2769, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2771 = torch.aten.sum.dim_IntList %2770, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2772 = torch.aten.div.Scalar %2771, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2773 = torch.aten.sub.Tensor %2770, %2772, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2774 = torch.aten.mul.Tensor %2773, %2773 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2775 = torch.aten.sum.dim_IntList %2774, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2776 = torch.aten.div.Scalar %2775, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2777 = torch.aten.to.dtype %2776, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2778 = torch.aten.sum.dim_IntList %2769, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2779 = torch.aten.div.Scalar %2778, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2780 = torch.aten.add.Tensor %2777, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2781 = torch.aten.rsqrt %2780 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2782 = torch.aten.sub.Tensor %2764, %2779, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2783 = torch.aten.mul.Tensor %2782, %2781 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2784 = torch.aten.view %2783, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2785 = torch.aten.unsqueeze %262, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2786 = torch.aten.unsqueeze %2785, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2787 = torch.aten.mul.Tensor %2784, %2786 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2788 = torch.aten.unsqueeze %263, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2789 = torch.aten.unsqueeze %2788, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2790 = torch.aten.add.Tensor %2787, %2789, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2791 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2792 = torch.aten.to.dtype %2791, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2793 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2794 = torch.aten.broadcast_to %2792, %2793 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2795 = torch.valsem.aten.copy %2794, %2790, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2796 = torch.aten.sigmoid %2795 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2797 = torch.aten.mul.Tensor %2796, %2795 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2798 = torch.aten.convolution %2797, %264, %265, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2799 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2800 = torch.aten.mul.Tensor %2799, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2801 = torch.aten.transpose.int %266, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2802 = torch.aten.mm %2800, %2801 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2803 = torch.aten.mul.Scalar %267, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2804 = torch.aten.add.Tensor %2803, %2802, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%2805 = torch.aten.slice.Tensor %2804, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2806 = torch.aten.slice.Tensor %2805, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2807 = torch.aten.unsqueeze %2806, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2808 = torch.aten.unsqueeze %2807, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2809 = torch.aten.add.Tensor %2798, %2808, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2810 = torch.aten.view %2809, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2811 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2812 = torch.aten.to.dtype %2811, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2813 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2814 = torch.aten.broadcast_to %2812, %2813 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2815 = torch.valsem.aten.copy %2814, %2810, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2816 = torch.aten.to.dtype %2815, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2817 = torch.aten.sum.dim_IntList %2816, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2818 = torch.aten.div.Scalar %2817, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2819 = torch.aten.sub.Tensor %2816, %2818, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2820 = torch.aten.mul.Tensor %2819, %2819 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2821 = torch.aten.sum.dim_IntList %2820, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2822 = torch.aten.div.Scalar %2821, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2823 = torch.aten.to.dtype %2822, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2824 = torch.aten.sum.dim_IntList %2815, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2825 = torch.aten.div.Scalar %2824, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2826 = torch.aten.add.Tensor %2823, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2827 = torch.aten.rsqrt %2826 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2828 = torch.aten.sub.Tensor %2810, %2825, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2829 = torch.aten.mul.Tensor %2828, %2827 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2830 = torch.aten.view %2829, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2831 = torch.aten.unsqueeze %268, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2832 = torch.aten.unsqueeze %2831, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2833 = torch.aten.mul.Tensor %2830, %2832 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2834 = torch.aten.unsqueeze %269, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2835 = torch.aten.unsqueeze %2834, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2836 = torch.aten.add.Tensor %2833, %2835, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2837 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2838 = torch.aten.to.dtype %2837, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2839 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2840 = torch.aten.broadcast_to %2838, %2839 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2841 = torch.valsem.aten.copy %2840, %2836, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2842 = torch.aten.sigmoid %2841 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2843 = torch.aten.mul.Tensor %2842, %2841 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%2844 = torch.aten.convolution %2843, %270, %271, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2845 = torch.aten.add.Tensor %2762, %2844, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2846 = torch.aten.div.Tensor %2845, %9 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
%2847 = torch.aten.clone %2846, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2848 = torch.aten.view %2847, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%2849 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2850 = torch.aten.to.dtype %2849, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%2851 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2852 = torch.aten.broadcast_to %2850, %2851 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%2853 = torch.valsem.aten.copy %2852, %2848, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%2854 = torch.aten.to.dtype %2853, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%2855 = torch.aten.sum.dim_IntList %2854, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2856 = torch.aten.div.Scalar %2855, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2857 = torch.aten.sub.Tensor %2854, %2856, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%2858 = torch.aten.mul.Tensor %2857, %2857 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%2859 = torch.aten.sum.dim_IntList %2858, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%2860 = torch.aten.div.Scalar %2859, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%2861 = torch.aten.to.dtype %2860, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2862 = torch.aten.sum.dim_IntList %2853, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2863 = torch.aten.div.Scalar %2862, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2864 = torch.aten.add.Tensor %2861, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2865 = torch.aten.rsqrt %2864 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2866 = torch.aten.sub.Tensor %2848, %2863, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%2867 = torch.aten.mul.Tensor %2866, %2865 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%2868 = torch.aten.view %2867, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%2869 = torch.aten.unsqueeze %272, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2870 = torch.aten.unsqueeze %2869, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2871 = torch.aten.mul.Tensor %2868, %2870 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%2872 = torch.aten.unsqueeze %273, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2873 = torch.aten.unsqueeze %2872, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2874 = torch.aten.add.Tensor %2871, %2873, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%2875 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%2876 = torch.aten.to.dtype %2875, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%2877 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2878 = torch.aten.broadcast_to %2876, %2877 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%2879 = torch.valsem.aten.copy %2878, %2874, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%2880 = torch.aten.convolution %2879, %274, %275, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%2881 = torch.aten.permute %2880, %866 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
%2882 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2883 = torch.aten.view %2881, %2882 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2884 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2885 = torch.aten.sum.dim_IntList %2883, %2884, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
%2886 = torch.aten.div.Scalar %2885, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
%2887 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2888 = torch.aten.broadcast_to %2886, %2887 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2889 = torch.aten.sub.Tensor %2883, %2888, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%2890 = torch.aten.mul.Tensor %2889, %2889 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%2891 = torch.aten.sum.dim_IntList %2890, %2884, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
%2892 = torch.aten.div.Scalar %2891, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
%2893 = torch.aten.add.Scalar %2892, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16>
%2894 = torch.aten.rsqrt %2893 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16>
%2895 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2896 = torch.aten.broadcast_to %2894, %2895 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2897 = torch.aten.mul.Tensor %2889, %2896 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%2898 = torch.aten.mul.Tensor %2897, %276 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%2899 = torch.aten.add.Tensor %2898, %277, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%2900 = torch.aten.transpose.int %278, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2901 = torch.prim.ListConstruct %int128, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%2902 = torch.aten.view %2899, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%2903 = torch.aten.mm %2902, %2900 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%2904 = torch.aten.view %2903, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2905 = torch.aten.transpose.int %279, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2906 = torch.aten.view %2899, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%2907 = torch.aten.mm %2906, %2905 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%2908 = torch.aten.view %2907, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2909 = torch.aten.transpose.int %280, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2910 = torch.aten.view %2899, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%2911 = torch.aten.mm %2910, %2909 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%2912 = torch.aten.view %2911, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2913 = torch.prim.ListConstruct %int2, %int64, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2914 = torch.aten.view %2904, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%2915 = torch.aten.permute %2914, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%2916 = torch.aten.clone %2915, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%2917 = torch.prim.ListConstruct %int16, %int64, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2918 = torch.aten.view %2916, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2919 = torch.aten.view %2908, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%2920 = torch.aten.permute %2919, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%2921 = torch.aten.clone %2920, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%2922 = torch.aten.view %2921, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2923 = torch.aten.view %2912, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%2924 = torch.aten.permute %2923, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%2925 = torch.aten.clone %2924, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%2926 = torch.aten.view %2925, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2927 = torch.aten.transpose.int %2922, %int-1, %int-2 : !torch.vtensor<[16,64,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,64],f16>
%2928 = torch.aten.broadcast_to %2918, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2929 = torch.aten.view %2928, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2930 = torch.prim.ListConstruct %int16, %int160, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2931 = torch.aten.broadcast_to %2927, %2930 : !torch.vtensor<[16,160,64],f16>, !torch.list<int> -> !torch.vtensor<[16,160,64],f16>
%2932 = torch.aten.view %2931, %2930 : !torch.vtensor<[16,160,64],f16>, !torch.list<int> -> !torch.vtensor<[16,160,64],f16>
%2933 = torch.aten.bmm %2929, %2932 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,64],f16> -> !torch.vtensor<[16,64,64],f16>
%2934 = torch.prim.ListConstruct %int16, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2935 = torch.aten.view %2933, %2934 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
%2936 = torch.aten.mul.Tensor %2935, %1 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,64],f16>
%values_22, %indices_23 = torch.aten.max.dim %2936, %int-1, %true : !torch.vtensor<[16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,1],f16>, !torch.vtensor<[16,64,1],si64>
%2937 = torch.aten.sub.Tensor %2936, %values_22, %float1.000000e00 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,1],f16>, !torch.float -> !torch.vtensor<[16,64,64],f16>
%2938 = torch.aten.exp %2937 : !torch.vtensor<[16,64,64],f16> -> !torch.vtensor<[16,64,64],f16>
%2939 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%2940 = torch.aten.sum.dim_IntList %2938, %2939, %true, %none : !torch.vtensor<[16,64,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,64,1],f16>
%2941 = torch.aten.div.Tensor %2938, %2940 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,1],f16> -> !torch.vtensor<[16,64,64],f16>
%2942 = torch.aten.broadcast_to %2941, %2934 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
%2943 = torch.aten.view %2942, %2934 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
%2944 = torch.aten.broadcast_to %2926, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2945 = torch.aten.view %2944, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2946 = torch.aten.bmm %2943, %2945 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,160],f16> -> !torch.vtensor<[16,64,160],f16>
%2947 = torch.aten.view %2946, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2948 = torch.prim.ListConstruct %int2, %int8, %int64, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2949 = torch.aten.view %2947, %2948 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%2950 = torch.aten.permute %2949, %901 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%2951 = torch.aten.clone %2950, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
%2952 = torch.aten.view %2951, %2882 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2953 = torch.aten.transpose.int %281, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2954 = torch.aten.view %2952, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%2955 = torch.aten.mm %2954, %2953 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%2956 = torch.aten.mul.Scalar %282, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%2957 = torch.aten.add.Tensor %2956, %2955, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16>
%2958 = torch.aten.view %2957, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2959 = torch.aten.add.Tensor %2958, %2883, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%2960 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%2961 = torch.aten.sum.dim_IntList %2959, %2960, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
%2962 = torch.aten.div.Scalar %2961, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
%2963 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2964 = torch.aten.broadcast_to %2962, %2963 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2965 = torch.aten.sub.Tensor %2959, %2964, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%2966 = torch.aten.mul.Tensor %2965, %2965 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%2967 = torch.aten.sum.dim_IntList %2966, %2960, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
%2968 = torch.aten.div.Scalar %2967, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
%2969 = torch.aten.add.Scalar %2968, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16>
%2970 = torch.aten.rsqrt %2969 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16>
%2971 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2972 = torch.aten.broadcast_to %2970, %2971 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2973 = torch.aten.mul.Tensor %2965, %2972 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%2974 = torch.aten.mul.Tensor %2973, %283 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%2975 = torch.aten.add.Tensor %2974, %284, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%2976 = torch.aten.transpose.int %285, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%2977 = torch.aten.view %2975, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%2978 = torch.aten.mm %2977, %2976 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%2979 = torch.aten.view %2978, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%2980 = torch.aten.transpose.int %286, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%2981 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2982 = torch.aten.mm %2981, %2980 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2983 = torch.aten.view %2982, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2984 = torch.aten.transpose.int %287, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%2985 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2986 = torch.aten.mm %2985, %2984 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2987 = torch.aten.view %2986, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2988 = torch.aten.view %2979, %2913 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%2989 = torch.aten.permute %2988, %901 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%2990 = torch.aten.clone %2989, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%2991 = torch.aten.view %2990, %2917 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%2992 = torch.aten.view %2983, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2993 = torch.aten.permute %2992, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2994 = torch.aten.clone %2993, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2995 = torch.aten.view %2994, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2996 = torch.aten.view %2987, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2997 = torch.aten.permute %2996, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2998 = torch.aten.clone %2997, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2999 = torch.aten.view %2998, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3000 = torch.aten.transpose.int %2995, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%3001 = torch.aten.broadcast_to %2991, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%3002 = torch.aten.view %3001, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%3003 = torch.aten.broadcast_to %3000, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%3004 = torch.aten.view %3003, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%3005 = torch.aten.bmm %3002, %3004 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,64,77],f16>
%3006 = torch.prim.ListConstruct %int16, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3007 = torch.aten.view %3005, %3006 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
%3008 = torch.aten.mul.Tensor %3007, %1 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,77],f16>
%values_24, %indices_25 = torch.aten.max.dim %3008, %int-1, %true : !torch.vtensor<[16,64,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,1],f16>, !torch.vtensor<[16,64,1],si64>
%3009 = torch.aten.sub.Tensor %3008, %values_24, %float1.000000e00 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,64,1],f16>, !torch.float -> !torch.vtensor<[16,64,77],f16>
%3010 = torch.aten.exp %3009 : !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,64,77],f16>
%3011 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%3012 = torch.aten.sum.dim_IntList %3010, %3011, %true, %none : !torch.vtensor<[16,64,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,64,1],f16>
%3013 = torch.aten.div.Tensor %3010, %3012 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,64,1],f16> -> !torch.vtensor<[16,64,77],f16>
%3014 = torch.aten.broadcast_to %3013, %3006 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
%3015 = torch.aten.view %3014, %3006 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
%3016 = torch.aten.broadcast_to %2999, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3017 = torch.aten.view %3016, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3018 = torch.aten.bmm %3015, %3017 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,64,160],f16>
%3019 = torch.aten.view %3018, %2917 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%3020 = torch.aten.view %3019, %2948 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%3021 = torch.aten.permute %3020, %901 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%3022 = torch.aten.clone %3021, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
%3023 = torch.aten.view %3022, %2882 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%3024 = torch.aten.transpose.int %288, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3025 = torch.aten.view %3023, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%3026 = torch.aten.mm %3025, %3024 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%3027 = torch.aten.mul.Scalar %289, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3028 = torch.aten.add.Tensor %3027, %3026, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16>
%3029 = torch.aten.view %3028, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%3030 = torch.aten.add.Tensor %3029, %2959, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%3031 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3032 = torch.aten.sum.dim_IntList %3030, %3031, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
%3033 = torch.aten.div.Scalar %3032, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
%3034 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3035 = torch.aten.broadcast_to %3033, %3034 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%3036 = torch.aten.sub.Tensor %3030, %3035, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%3037 = torch.aten.mul.Tensor %3036, %3036 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%3038 = torch.aten.sum.dim_IntList %3037, %3031, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16>
%3039 = torch.aten.div.Scalar %3038, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16>
%3040 = torch.aten.add.Scalar %3039, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16>
%3041 = torch.aten.rsqrt %3040 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16>
%3042 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3043 = torch.aten.broadcast_to %3041, %3042 : !torch.vtensor<[2,64,1],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%3044 = torch.aten.mul.Tensor %3036, %3043 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%3045 = torch.aten.mul.Tensor %3044, %290 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16>
%3046 = torch.aten.add.Tensor %3045, %291, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%3047 = torch.aten.transpose.int %292, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
%3048 = torch.aten.view %3046, %2901 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%3049 = torch.aten.mm %3048, %3047 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[128,10240],f16>
%3050 = torch.aten.mul.Scalar %293, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
%3051 = torch.aten.add.Tensor %3050, %3049, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[128,10240],f16>, !torch.int -> !torch.vtensor<[128,10240],f16>
%3052 = torch.prim.ListConstruct %int2, %int64, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3053 = torch.aten.view %3051, %3052 : !torch.vtensor<[128,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10240],f16>
%3054 = torch.aten.slice.Tensor %3053, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
%3055 = torch.aten.slice.Tensor %3053, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
%3056 = torch.aten.gelu %3055, %str : !torch.vtensor<[2,64,5120],f16>, !torch.str -> !torch.vtensor<[2,64,5120],f16>
%3057 = torch.aten.mul.Tensor %3054, %3056 : !torch.vtensor<[2,64,5120],f16>, !torch.vtensor<[2,64,5120],f16> -> !torch.vtensor<[2,64,5120],f16>
%3058 = torch.aten.transpose.int %294, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
%3059 = torch.prim.ListConstruct %int128, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
%3060 = torch.aten.view %3057, %3059 : !torch.vtensor<[2,64,5120],f16>, !torch.list<int> -> !torch.vtensor<[128,5120],f16>
%3061 = torch.aten.mm %3060, %3058 : !torch.vtensor<[128,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[128,1280],f16>
%3062 = torch.aten.mul.Scalar %295, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3063 = torch.aten.add.Tensor %3062, %3061, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16>
%3064 = torch.aten.view %3063, %2882 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%3065 = torch.aten.add.Tensor %3064, %3030, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%3066 = torch.prim.ListConstruct %int2, %int8, %int8, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3067 = torch.aten.view %3065, %3066 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
%3068 = torch.aten.permute %3067, %1060 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%3069 = torch.aten.convolution %3068, %296, %297, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3070 = torch.aten.add.Tensor %3069, %2846, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3071 = torch.aten.clone %3070, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3072 = torch.aten.view %3071, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%3073 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3074 = torch.aten.to.dtype %3073, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3075 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3076 = torch.aten.broadcast_to %3074, %3075 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%3077 = torch.valsem.aten.copy %3076, %3072, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%3078 = torch.aten.to.dtype %3077, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%3079 = torch.aten.sum.dim_IntList %3078, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3080 = torch.aten.div.Scalar %3079, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3081 = torch.aten.sub.Tensor %3078, %3080, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%3082 = torch.aten.mul.Tensor %3081, %3081 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%3083 = torch.aten.sum.dim_IntList %3082, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3084 = torch.aten.div.Scalar %3083, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3085 = torch.aten.to.dtype %3084, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3086 = torch.aten.sum.dim_IntList %3077, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3087 = torch.aten.div.Scalar %3086, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3088 = torch.aten.add.Tensor %3085, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3089 = torch.aten.rsqrt %3088 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3090 = torch.aten.sub.Tensor %3072, %3087, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%3091 = torch.aten.mul.Tensor %3090, %3089 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%3092 = torch.aten.view %3091, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%3093 = torch.aten.unsqueeze %298, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3094 = torch.aten.unsqueeze %3093, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3095 = torch.aten.mul.Tensor %3092, %3094 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%3096 = torch.aten.unsqueeze %299, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3097 = torch.aten.unsqueeze %3096, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3098 = torch.aten.add.Tensor %3095, %3097, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%3099 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3100 = torch.aten.to.dtype %3099, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3101 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3102 = torch.aten.broadcast_to %3100, %3101 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%3103 = torch.valsem.aten.copy %3102, %3098, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%3104 = torch.aten.sigmoid %3103 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3105 = torch.aten.mul.Tensor %3104, %3103 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3106 = torch.aten.convolution %3105, %300, %301, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3107 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3108 = torch.aten.mul.Tensor %3107, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3109 = torch.aten.transpose.int %302, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3110 = torch.aten.mm %3108, %3109 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3111 = torch.aten.mul.Scalar %303, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3112 = torch.aten.add.Tensor %3111, %3110, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%3113 = torch.aten.slice.Tensor %3112, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3114 = torch.aten.slice.Tensor %3113, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3115 = torch.aten.unsqueeze %3114, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%3116 = torch.aten.unsqueeze %3115, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%3117 = torch.aten.add.Tensor %3106, %3116, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3118 = torch.aten.view %3117, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%3119 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3120 = torch.aten.to.dtype %3119, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3121 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3122 = torch.aten.broadcast_to %3120, %3121 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%3123 = torch.valsem.aten.copy %3122, %3118, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%3124 = torch.aten.to.dtype %3123, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%3125 = torch.aten.sum.dim_IntList %3124, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3126 = torch.aten.div.Scalar %3125, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3127 = torch.aten.sub.Tensor %3124, %3126, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%3128 = torch.aten.mul.Tensor %3127, %3127 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%3129 = torch.aten.sum.dim_IntList %3128, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3130 = torch.aten.div.Scalar %3129, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3131 = torch.aten.to.dtype %3130, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3132 = torch.aten.sum.dim_IntList %3123, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3133 = torch.aten.div.Scalar %3132, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3134 = torch.aten.add.Tensor %3131, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3135 = torch.aten.rsqrt %3134 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3136 = torch.aten.sub.Tensor %3118, %3133, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%3137 = torch.aten.mul.Tensor %3136, %3135 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%3138 = torch.aten.view %3137, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%3139 = torch.aten.unsqueeze %304, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3140 = torch.aten.unsqueeze %3139, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3141 = torch.aten.mul.Tensor %3138, %3140 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%3142 = torch.aten.unsqueeze %305, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3143 = torch.aten.unsqueeze %3142, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3144 = torch.aten.add.Tensor %3141, %3143, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%3145 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3146 = torch.aten.to.dtype %3145, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3147 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3148 = torch.aten.broadcast_to %3146, %3147 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%3149 = torch.valsem.aten.copy %3148, %3144, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%3150 = torch.aten.sigmoid %3149 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3151 = torch.aten.mul.Tensor %3150, %3149 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3152 = torch.aten.convolution %3151, %306, %307, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3153 = torch.aten.add.Tensor %3070, %3152, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3154 = torch.aten.div.Tensor %3153, %9 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
%3155 = torch.prim.ListConstruct %3154, %2762 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
%3156 = torch.aten.cat %3155, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%3157 = torch.aten.clone %3156, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%3158 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3159 = torch.aten.view %3157, %3158 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
%3160 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3161 = torch.aten.to.dtype %3160, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3162 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3163 = torch.aten.broadcast_to %3161, %3162 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f32>
%3164 = torch.valsem.aten.copy %3163, %3159, %false : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,80,64],f16>, !torch.bool -> !torch.vtensor<[2,32,80,64],f32>
%3165 = torch.aten.to.dtype %3164, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f64>
%3166 = torch.aten.sum.dim_IntList %3165, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3167 = torch.aten.div.Scalar %3166, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3168 = torch.aten.sub.Tensor %3165, %3167, %float1.000000e00 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,64],f64>
%3169 = torch.aten.mul.Tensor %3168, %3168 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,80,64],f64> -> !torch.vtensor<[2,32,80,64],f64>
%3170 = torch.aten.sum.dim_IntList %3169, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3171 = torch.aten.div.Scalar %3170, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3172 = torch.aten.to.dtype %3171, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3173 = torch.aten.sum.dim_IntList %3164, %754, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3174 = torch.aten.div.Scalar %3173, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3175 = torch.aten.add.Tensor %3172, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3176 = torch.aten.rsqrt %3175 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3177 = torch.aten.sub.Tensor %3159, %3174, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
%3178 = torch.aten.mul.Tensor %3177, %3176 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
%3179 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3180 = torch.aten.view %3178, %3179 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
%3181 = torch.aten.unsqueeze %308, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3182 = torch.aten.unsqueeze %3181, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3183 = torch.aten.mul.Tensor %3180, %3182 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
%3184 = torch.aten.unsqueeze %309, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3185 = torch.aten.unsqueeze %3184, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3186 = torch.aten.add.Tensor %3183, %3185, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
%3187 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3188 = torch.aten.to.dtype %3187, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3189 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3190 = torch.aten.broadcast_to %3188, %3189 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f16>
%3191 = torch.valsem.aten.copy %3190, %3186, %false : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f32>, !torch.bool -> !torch.vtensor<[2,2560,8,8],f16>
%3192 = torch.aten.sigmoid %3191 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%3193 = torch.aten.mul.Tensor %3192, %3191 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%3194 = torch.aten.convolution %3193, %310, %311, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3195 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3196 = torch.aten.mul.Tensor %3195, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3197 = torch.aten.transpose.int %312, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3198 = torch.aten.mm %3196, %3197 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3199 = torch.aten.mul.Scalar %313, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3200 = torch.aten.add.Tensor %3199, %3198, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%3201 = torch.aten.slice.Tensor %3200, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3202 = torch.aten.slice.Tensor %3201, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3203 = torch.aten.unsqueeze %3202, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%3204 = torch.aten.unsqueeze %3203, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%3205 = torch.aten.add.Tensor %3194, %3204, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3206 = torch.aten.view %3205, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%3207 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3208 = torch.aten.to.dtype %3207, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3209 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3210 = torch.aten.broadcast_to %3208, %3209 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%3211 = torch.valsem.aten.copy %3210, %3206, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%3212 = torch.aten.to.dtype %3211, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%3213 = torch.aten.sum.dim_IntList %3212, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3214 = torch.aten.div.Scalar %3213, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3215 = torch.aten.sub.Tensor %3212, %3214, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%3216 = torch.aten.mul.Tensor %3215, %3215 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%3217 = torch.aten.sum.dim_IntList %3216, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3218 = torch.aten.div.Scalar %3217, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3219 = torch.aten.to.dtype %3218, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3220 = torch.aten.sum.dim_IntList %3211, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3221 = torch.aten.div.Scalar %3220, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3222 = torch.aten.add.Tensor %3219, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3223 = torch.aten.rsqrt %3222 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3224 = torch.aten.sub.Tensor %3206, %3221, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%3225 = torch.aten.mul.Tensor %3224, %3223 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%3226 = torch.aten.view %3225, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%3227 = torch.aten.unsqueeze %314, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3228 = torch.aten.unsqueeze %3227, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3229 = torch.aten.mul.Tensor %3226, %3228 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%3230 = torch.aten.unsqueeze %315, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3231 = torch.aten.unsqueeze %3230, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3232 = torch.aten.add.Tensor %3229, %3231, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%3233 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3234 = torch.aten.to.dtype %3233, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3235 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3236 = torch.aten.broadcast_to %3234, %3235 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%3237 = torch.valsem.aten.copy %3236, %3232, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%3238 = torch.aten.sigmoid %3237 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3239 = torch.aten.mul.Tensor %3238, %3237 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3240 = torch.aten.convolution %3239, %316, %317, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3241 = torch.aten.convolution %3156, %318, %319, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3242 = torch.aten.add.Tensor %3241, %3240, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3243 = torch.aten.div.Tensor %3242, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%3244 = torch.prim.ListConstruct %3243, %2678 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
%3245 = torch.aten.cat %3244, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%3246 = torch.aten.clone %3245, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%3247 = torch.aten.view %3246, %3158 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
%3248 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3249 = torch.aten.to.dtype %3248, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3250 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3251 = torch.aten.broadcast_to %3249, %3250 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f32>
%3252 = torch.valsem.aten.copy %3251, %3247, %false : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,80,64],f16>, !torch.bool -> !torch.vtensor<[2,32,80,64],f32>
%3253 = torch.aten.to.dtype %3252, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f64>
%3254 = torch.aten.sum.dim_IntList %3253, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3255 = torch.aten.div.Scalar %3254, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3256 = torch.aten.sub.Tensor %3253, %3255, %float1.000000e00 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,64],f64>
%3257 = torch.aten.mul.Tensor %3256, %3256 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,80,64],f64> -> !torch.vtensor<[2,32,80,64],f64>
%3258 = torch.aten.sum.dim_IntList %3257, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3259 = torch.aten.div.Scalar %3258, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3260 = torch.aten.to.dtype %3259, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3261 = torch.aten.sum.dim_IntList %3252, %754, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3262 = torch.aten.div.Scalar %3261, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3263 = torch.aten.add.Tensor %3260, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3264 = torch.aten.rsqrt %3263 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3265 = torch.aten.sub.Tensor %3247, %3262, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
%3266 = torch.aten.mul.Tensor %3265, %3264 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
%3267 = torch.aten.view %3266, %3179 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
%3268 = torch.aten.unsqueeze %320, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3269 = torch.aten.unsqueeze %3268, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3270 = torch.aten.mul.Tensor %3267, %3269 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
%3271 = torch.aten.unsqueeze %321, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3272 = torch.aten.unsqueeze %3271, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3273 = torch.aten.add.Tensor %3270, %3272, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
%3274 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3275 = torch.aten.to.dtype %3274, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3276 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3277 = torch.aten.broadcast_to %3275, %3276 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f16>
%3278 = torch.valsem.aten.copy %3277, %3273, %false : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f32>, !torch.bool -> !torch.vtensor<[2,2560,8,8],f16>
%3279 = torch.aten.sigmoid %3278 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%3280 = torch.aten.mul.Tensor %3279, %3278 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%3281 = torch.aten.convolution %3280, %322, %323, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3282 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3283 = torch.aten.mul.Tensor %3282, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3284 = torch.aten.transpose.int %324, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3285 = torch.aten.mm %3283, %3284 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3286 = torch.aten.mul.Scalar %325, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3287 = torch.aten.add.Tensor %3286, %3285, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%3288 = torch.aten.slice.Tensor %3287, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3289 = torch.aten.slice.Tensor %3288, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3290 = torch.aten.unsqueeze %3289, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%3291 = torch.aten.unsqueeze %3290, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%3292 = torch.aten.add.Tensor %3281, %3291, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3293 = torch.aten.view %3292, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%3294 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3295 = torch.aten.to.dtype %3294, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3296 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3297 = torch.aten.broadcast_to %3295, %3296 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%3298 = torch.valsem.aten.copy %3297, %3293, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%3299 = torch.aten.to.dtype %3298, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%3300 = torch.aten.sum.dim_IntList %3299, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3301 = torch.aten.div.Scalar %3300, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3302 = torch.aten.sub.Tensor %3299, %3301, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%3303 = torch.aten.mul.Tensor %3302, %3302 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%3304 = torch.aten.sum.dim_IntList %3303, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3305 = torch.aten.div.Scalar %3304, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3306 = torch.aten.to.dtype %3305, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3307 = torch.aten.sum.dim_IntList %3298, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3308 = torch.aten.div.Scalar %3307, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3309 = torch.aten.add.Tensor %3306, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3310 = torch.aten.rsqrt %3309 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3311 = torch.aten.sub.Tensor %3293, %3308, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%3312 = torch.aten.mul.Tensor %3311, %3310 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%3313 = torch.aten.view %3312, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%3314 = torch.aten.unsqueeze %326, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3315 = torch.aten.unsqueeze %3314, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3316 = torch.aten.mul.Tensor %3313, %3315 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%3317 = torch.aten.unsqueeze %327, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3318 = torch.aten.unsqueeze %3317, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3319 = torch.aten.add.Tensor %3316, %3318, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%3320 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3321 = torch.aten.to.dtype %3320, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3322 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3323 = torch.aten.broadcast_to %3321, %3322 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%3324 = torch.valsem.aten.copy %3323, %3319, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%3325 = torch.aten.sigmoid %3324 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3326 = torch.aten.mul.Tensor %3325, %3324 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3327 = torch.aten.convolution %3326, %328, %329, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3328 = torch.aten.convolution %3245, %330, %331, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3329 = torch.aten.add.Tensor %3328, %3327, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3330 = torch.aten.div.Tensor %3329, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%3331 = torch.prim.ListConstruct %3330, %2592 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
%3332 = torch.aten.cat %3331, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%3333 = torch.aten.clone %3332, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%3334 = torch.aten.view %3333, %3158 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
%3335 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3336 = torch.aten.to.dtype %3335, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3337 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3338 = torch.aten.broadcast_to %3336, %3337 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f32>
%3339 = torch.valsem.aten.copy %3338, %3334, %false : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,80,64],f16>, !torch.bool -> !torch.vtensor<[2,32,80,64],f32>
%3340 = torch.aten.to.dtype %3339, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f64>
%3341 = torch.aten.sum.dim_IntList %3340, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3342 = torch.aten.div.Scalar %3341, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3343 = torch.aten.sub.Tensor %3340, %3342, %float1.000000e00 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,64],f64>
%3344 = torch.aten.mul.Tensor %3343, %3343 : !torch.vtensor<[2,32,80,64],f64>, !torch.vtensor<[2,32,80,64],f64> -> !torch.vtensor<[2,32,80,64],f64>
%3345 = torch.aten.sum.dim_IntList %3344, %754, %true, %none : !torch.vtensor<[2,32,80,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3346 = torch.aten.div.Scalar %3345, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3347 = torch.aten.to.dtype %3346, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3348 = torch.aten.sum.dim_IntList %3339, %754, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3349 = torch.aten.div.Scalar %3348, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3350 = torch.aten.add.Tensor %3347, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3351 = torch.aten.rsqrt %3350 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3352 = torch.aten.sub.Tensor %3334, %3349, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
%3353 = torch.aten.mul.Tensor %3352, %3351 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
%3354 = torch.aten.view %3353, %3179 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
%3355 = torch.aten.unsqueeze %332, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3356 = torch.aten.unsqueeze %3355, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3357 = torch.aten.mul.Tensor %3354, %3356 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
%3358 = torch.aten.unsqueeze %333, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3359 = torch.aten.unsqueeze %3358, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3360 = torch.aten.add.Tensor %3357, %3359, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
%3361 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3362 = torch.aten.to.dtype %3361, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3363 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3364 = torch.aten.broadcast_to %3362, %3363 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f16>
%3365 = torch.valsem.aten.copy %3364, %3360, %false : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f32>, !torch.bool -> !torch.vtensor<[2,2560,8,8],f16>
%3366 = torch.aten.sigmoid %3365 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%3367 = torch.aten.mul.Tensor %3366, %3365 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%3368 = torch.aten.convolution %3367, %334, %335, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3369 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3370 = torch.aten.mul.Tensor %3369, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3371 = torch.aten.transpose.int %336, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3372 = torch.aten.mm %3370, %3371 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3373 = torch.aten.mul.Scalar %337, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3374 = torch.aten.add.Tensor %3373, %3372, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%3375 = torch.aten.slice.Tensor %3374, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3376 = torch.aten.slice.Tensor %3375, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3377 = torch.aten.unsqueeze %3376, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%3378 = torch.aten.unsqueeze %3377, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%3379 = torch.aten.add.Tensor %3368, %3378, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3380 = torch.aten.view %3379, %2594 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%3381 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3382 = torch.aten.to.dtype %3381, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3383 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3384 = torch.aten.broadcast_to %3382, %3383 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f32>
%3385 = torch.valsem.aten.copy %3384, %3380, %false : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,40,64],f16>, !torch.bool -> !torch.vtensor<[2,32,40,64],f32>
%3386 = torch.aten.to.dtype %3385, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64>
%3387 = torch.aten.sum.dim_IntList %3386, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3388 = torch.aten.div.Scalar %3387, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3389 = torch.aten.sub.Tensor %3386, %3388, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64>
%3390 = torch.aten.mul.Tensor %3389, %3389 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64>
%3391 = torch.aten.sum.dim_IntList %3390, %754, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3392 = torch.aten.div.Scalar %3391, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3393 = torch.aten.to.dtype %3392, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3394 = torch.aten.sum.dim_IntList %3385, %754, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3395 = torch.aten.div.Scalar %3394, %int2560 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3396 = torch.aten.add.Tensor %3393, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3397 = torch.aten.rsqrt %3396 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3398 = torch.aten.sub.Tensor %3380, %3395, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%3399 = torch.aten.mul.Tensor %3398, %3397 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%3400 = torch.aten.view %3399, %2615 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%3401 = torch.aten.unsqueeze %338, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3402 = torch.aten.unsqueeze %3401, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3403 = torch.aten.mul.Tensor %3400, %3402 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%3404 = torch.aten.unsqueeze %339, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3405 = torch.aten.unsqueeze %3404, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3406 = torch.aten.add.Tensor %3403, %3405, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%3407 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3408 = torch.aten.to.dtype %3407, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3409 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3410 = torch.aten.broadcast_to %3408, %3409 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%3411 = torch.valsem.aten.copy %3410, %3406, %false : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f32>, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%3412 = torch.aten.sigmoid %3411 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3413 = torch.aten.mul.Tensor %3412, %3411 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%3414 = torch.aten.convolution %3413, %340, %341, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3415 = torch.aten.convolution %3332, %342, %343, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3416 = torch.aten.add.Tensor %3415, %3414, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%3417 = torch.aten.div.Tensor %3416, %5 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%3418 = torch.prim.ListConstruct %float2.000000e00, %float2.000000e00 : (!torch.float, !torch.float) -> !torch.list<float>
%3419 = torch.aten.upsample_nearest2d.vec %3417, %none, %3418 : !torch.vtensor<[2,1280,8,8],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,16,16],f16>
%3420 = torch.aten.convolution %3419, %344, %345, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3421 = torch.prim.ListConstruct %3420, %2591 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
%3422 = torch.aten.cat %3421, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%3423 = torch.aten.clone %3422, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%3424 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3425 = torch.aten.view %3423, %3424 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
%3426 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3427 = torch.aten.to.dtype %3426, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3428 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3429 = torch.aten.broadcast_to %3427, %3428 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f32>
%3430 = torch.valsem.aten.copy %3429, %3425, %false : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,80,256],f16>, !torch.bool -> !torch.vtensor<[2,32,80,256],f32>
%3431 = torch.aten.to.dtype %3430, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f64>
%3432 = torch.aten.sum.dim_IntList %3431, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3433 = torch.aten.div.Scalar %3432, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3434 = torch.aten.sub.Tensor %3431, %3433, %float1.000000e00 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,256],f64>
%3435 = torch.aten.mul.Tensor %3434, %3434 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,80,256],f64> -> !torch.vtensor<[2,32,80,256],f64>
%3436 = torch.aten.sum.dim_IntList %3435, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3437 = torch.aten.div.Scalar %3436, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3438 = torch.aten.to.dtype %3437, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3439 = torch.aten.sum.dim_IntList %3430, %754, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3440 = torch.aten.div.Scalar %3439, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3441 = torch.aten.add.Tensor %3438, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3442 = torch.aten.rsqrt %3441 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3443 = torch.aten.sub.Tensor %3425, %3440, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
%3444 = torch.aten.mul.Tensor %3443, %3442 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
%3445 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3446 = torch.aten.view %3444, %3445 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
%3447 = torch.aten.unsqueeze %346, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3448 = torch.aten.unsqueeze %3447, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3449 = torch.aten.mul.Tensor %3446, %3448 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
%3450 = torch.aten.unsqueeze %347, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3451 = torch.aten.unsqueeze %3450, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3452 = torch.aten.add.Tensor %3449, %3451, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
%3453 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3454 = torch.aten.to.dtype %3453, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3455 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3456 = torch.aten.broadcast_to %3454, %3455 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f16>
%3457 = torch.valsem.aten.copy %3456, %3452, %false : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f32>, !torch.bool -> !torch.vtensor<[2,2560,16,16],f16>
%3458 = torch.aten.sigmoid %3457 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
%3459 = torch.aten.mul.Tensor %3458, %3457 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
%3460 = torch.aten.convolution %3459, %348, %349, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3461 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3462 = torch.aten.mul.Tensor %3461, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3463 = torch.aten.transpose.int %350, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3464 = torch.aten.mm %3462, %3463 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3465 = torch.aten.mul.Scalar %351, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3466 = torch.aten.add.Tensor %3465, %3464, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%3467 = torch.aten.slice.Tensor %3466, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3468 = torch.aten.slice.Tensor %3467, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3469 = torch.aten.unsqueeze %3468, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%3470 = torch.aten.unsqueeze %3469, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%3471 = torch.aten.add.Tensor %3460, %3470, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3472 = torch.aten.view %3471, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%3473 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3474 = torch.aten.to.dtype %3473, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3475 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3476 = torch.aten.broadcast_to %3474, %3475 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%3477 = torch.valsem.aten.copy %3476, %3472, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%3478 = torch.aten.to.dtype %3477, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%3479 = torch.aten.sum.dim_IntList %3478, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3480 = torch.aten.div.Scalar %3479, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3481 = torch.aten.sub.Tensor %3478, %3480, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%3482 = torch.aten.mul.Tensor %3481, %3481 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%3483 = torch.aten.sum.dim_IntList %3482, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3484 = torch.aten.div.Scalar %3483, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3485 = torch.aten.to.dtype %3484, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3486 = torch.aten.sum.dim_IntList %3477, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3487 = torch.aten.div.Scalar %3486, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3488 = torch.aten.add.Tensor %3485, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3489 = torch.aten.rsqrt %3488 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3490 = torch.aten.sub.Tensor %3472, %3487, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%3491 = torch.aten.mul.Tensor %3490, %3489 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%3492 = torch.aten.view %3491, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%3493 = torch.aten.unsqueeze %352, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3494 = torch.aten.unsqueeze %3493, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3495 = torch.aten.mul.Tensor %3492, %3494 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%3496 = torch.aten.unsqueeze %353, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3497 = torch.aten.unsqueeze %3496, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3498 = torch.aten.add.Tensor %3495, %3497, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%3499 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3500 = torch.aten.to.dtype %3499, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3501 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3502 = torch.aten.broadcast_to %3500, %3501 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%3503 = torch.valsem.aten.copy %3502, %3498, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%3504 = torch.aten.sigmoid %3503 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%3505 = torch.aten.mul.Tensor %3504, %3503 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%3506 = torch.aten.convolution %3505, %354, %355, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3507 = torch.aten.convolution %3422, %356, %357, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3508 = torch.aten.add.Tensor %3507, %3506, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3509 = torch.aten.div.Tensor %3508, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%3510 = torch.aten.clone %3509, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3511 = torch.aten.view %3510, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%3512 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3513 = torch.aten.to.dtype %3512, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3514 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3515 = torch.aten.broadcast_to %3513, %3514 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%3516 = torch.valsem.aten.copy %3515, %3511, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%3517 = torch.aten.to.dtype %3516, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%3518 = torch.aten.sum.dim_IntList %3517, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3519 = torch.aten.div.Scalar %3518, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3520 = torch.aten.sub.Tensor %3517, %3519, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%3521 = torch.aten.mul.Tensor %3520, %3520 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%3522 = torch.aten.sum.dim_IntList %3521, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3523 = torch.aten.div.Scalar %3522, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3524 = torch.aten.to.dtype %3523, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3525 = torch.aten.sum.dim_IntList %3516, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3526 = torch.aten.div.Scalar %3525, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3527 = torch.aten.add.Tensor %3524, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3528 = torch.aten.rsqrt %3527 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3529 = torch.aten.sub.Tensor %3511, %3526, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%3530 = torch.aten.mul.Tensor %3529, %3528 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%3531 = torch.aten.view %3530, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%3532 = torch.aten.unsqueeze %358, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3533 = torch.aten.unsqueeze %3532, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3534 = torch.aten.mul.Tensor %3531, %3533 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%3535 = torch.aten.unsqueeze %359, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3536 = torch.aten.unsqueeze %3535, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3537 = torch.aten.add.Tensor %3534, %3536, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%3538 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3539 = torch.aten.to.dtype %3538, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3540 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3541 = torch.aten.broadcast_to %3539, %3540 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%3542 = torch.valsem.aten.copy %3541, %3537, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%3543 = torch.aten.convolution %3542, %360, %361, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3544 = torch.aten.permute %3543, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%3545 = torch.aten.view %3544, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3546 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3547 = torch.aten.sum.dim_IntList %3545, %3546, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3548 = torch.aten.div.Scalar %3547, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3549 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3550 = torch.aten.broadcast_to %3548, %3549 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3551 = torch.aten.sub.Tensor %3545, %3550, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3552 = torch.aten.mul.Tensor %3551, %3551 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3553 = torch.aten.sum.dim_IntList %3552, %3546, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3554 = torch.aten.div.Scalar %3553, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3555 = torch.aten.add.Scalar %3554, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3556 = torch.aten.rsqrt %3555 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%3557 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3558 = torch.aten.broadcast_to %3556, %3557 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3559 = torch.aten.mul.Tensor %3551, %3558 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3560 = torch.aten.mul.Tensor %3559, %362 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3561 = torch.aten.add.Tensor %3560, %363, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3562 = torch.aten.transpose.int %364, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3563 = torch.aten.view %3561, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3564 = torch.aten.mm %3563, %3562 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3565 = torch.aten.view %3564, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3566 = torch.aten.transpose.int %365, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3567 = torch.aten.view %3561, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3568 = torch.aten.mm %3567, %3566 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3569 = torch.aten.view %3568, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3570 = torch.aten.transpose.int %366, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3571 = torch.aten.view %3561, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3572 = torch.aten.mm %3571, %3570 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3573 = torch.aten.view %3572, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3574 = torch.aten.view %3565, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3575 = torch.aten.permute %3574, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3576 = torch.aten.clone %3575, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3577 = torch.aten.view %3576, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3578 = torch.aten.view %3569, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3579 = torch.aten.permute %3578, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3580 = torch.aten.clone %3579, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3581 = torch.aten.view %3580, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3582 = torch.aten.view %3573, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3583 = torch.aten.permute %3582, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3584 = torch.aten.clone %3583, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3585 = torch.aten.view %3584, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3586 = torch.aten.transpose.int %3581, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%3587 = torch.aten.broadcast_to %3577, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3588 = torch.aten.view %3587, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3589 = torch.aten.broadcast_to %3586, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%3590 = torch.aten.view %3589, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%3591 = torch.aten.bmm %3588, %3590 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%3592 = torch.aten.view %3591, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%3593 = torch.aten.mul.Tensor %3592, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%values_26, %indices_27 = torch.aten.max.dim %3593, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%3594 = torch.aten.sub.Tensor %3593, %values_26, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
%3595 = torch.aten.exp %3594 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
%3596 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%3597 = torch.aten.sum.dim_IntList %3595, %3596, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%3598 = torch.aten.div.Tensor %3595, %3597 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
%3599 = torch.aten.broadcast_to %3598, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%3600 = torch.aten.view %3599, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%3601 = torch.aten.broadcast_to %3585, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3602 = torch.aten.view %3601, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3603 = torch.aten.bmm %3600, %3602 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%3604 = torch.aten.view %3603, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3605 = torch.aten.view %3604, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3606 = torch.aten.permute %3605, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3607 = torch.aten.clone %3606, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%3608 = torch.aten.view %3607, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3609 = torch.aten.transpose.int %367, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3610 = torch.aten.view %3608, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3611 = torch.aten.mm %3610, %3609 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3612 = torch.aten.mul.Scalar %368, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3613 = torch.aten.add.Tensor %3612, %3611, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%3614 = torch.aten.view %3613, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3615 = torch.aten.add.Tensor %3614, %3545, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3616 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3617 = torch.aten.sum.dim_IntList %3615, %3616, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3618 = torch.aten.div.Scalar %3617, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3619 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3620 = torch.aten.broadcast_to %3618, %3619 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3621 = torch.aten.sub.Tensor %3615, %3620, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3622 = torch.aten.mul.Tensor %3621, %3621 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3623 = torch.aten.sum.dim_IntList %3622, %3616, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3624 = torch.aten.div.Scalar %3623, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3625 = torch.aten.add.Scalar %3624, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3626 = torch.aten.rsqrt %3625 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%3627 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3628 = torch.aten.broadcast_to %3626, %3627 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3629 = torch.aten.mul.Tensor %3621, %3628 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3630 = torch.aten.mul.Tensor %3629, %369 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3631 = torch.aten.add.Tensor %3630, %370, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3632 = torch.aten.transpose.int %371, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3633 = torch.aten.view %3631, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3634 = torch.aten.mm %3633, %3632 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3635 = torch.aten.view %3634, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3636 = torch.aten.transpose.int %372, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%3637 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3638 = torch.aten.mm %3637, %3636 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%3639 = torch.aten.view %3638, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%3640 = torch.aten.transpose.int %373, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%3641 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3642 = torch.aten.mm %3641, %3640 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%3643 = torch.aten.view %3642, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%3644 = torch.aten.view %3635, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3645 = torch.aten.permute %3644, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3646 = torch.aten.clone %3645, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3647 = torch.aten.view %3646, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3648 = torch.aten.view %3639, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%3649 = torch.aten.permute %3648, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%3650 = torch.aten.clone %3649, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%3651 = torch.aten.view %3650, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3652 = torch.aten.view %3643, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%3653 = torch.aten.permute %3652, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%3654 = torch.aten.clone %3653, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%3655 = torch.aten.view %3654, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3656 = torch.aten.transpose.int %3651, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%3657 = torch.aten.broadcast_to %3647, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3658 = torch.aten.view %3657, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3659 = torch.aten.broadcast_to %3656, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%3660 = torch.aten.view %3659, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%3661 = torch.aten.bmm %3658, %3660 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%3662 = torch.aten.view %3661, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%3663 = torch.aten.mul.Tensor %3662, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%values_28, %indices_29 = torch.aten.max.dim %3663, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%3664 = torch.aten.sub.Tensor %3663, %values_28, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
%3665 = torch.aten.exp %3664 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
%3666 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%3667 = torch.aten.sum.dim_IntList %3665, %3666, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%3668 = torch.aten.div.Tensor %3665, %3667 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
%3669 = torch.aten.broadcast_to %3668, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%3670 = torch.aten.view %3669, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%3671 = torch.aten.broadcast_to %3655, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3672 = torch.aten.view %3671, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3673 = torch.aten.bmm %3670, %3672 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%3674 = torch.aten.view %3673, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3675 = torch.aten.view %3674, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3676 = torch.aten.permute %3675, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3677 = torch.aten.clone %3676, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%3678 = torch.aten.view %3677, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3679 = torch.aten.transpose.int %374, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3680 = torch.aten.view %3678, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3681 = torch.aten.mm %3680, %3679 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3682 = torch.aten.mul.Scalar %375, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3683 = torch.aten.add.Tensor %3682, %3681, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%3684 = torch.aten.view %3683, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3685 = torch.aten.add.Tensor %3684, %3615, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3686 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3687 = torch.aten.sum.dim_IntList %3685, %3686, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3688 = torch.aten.div.Scalar %3687, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3689 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3690 = torch.aten.broadcast_to %3688, %3689 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3691 = torch.aten.sub.Tensor %3685, %3690, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3692 = torch.aten.mul.Tensor %3691, %3691 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3693 = torch.aten.sum.dim_IntList %3692, %3686, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3694 = torch.aten.div.Scalar %3693, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3695 = torch.aten.add.Scalar %3694, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3696 = torch.aten.rsqrt %3695 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%3697 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3698 = torch.aten.broadcast_to %3696, %3697 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3699 = torch.aten.mul.Tensor %3691, %3698 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3700 = torch.aten.mul.Tensor %3699, %376 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3701 = torch.aten.add.Tensor %3700, %377, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3702 = torch.aten.transpose.int %378, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
%3703 = torch.aten.view %3701, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3704 = torch.aten.mm %3703, %3702 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
%3705 = torch.aten.mul.Scalar %379, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
%3706 = torch.aten.add.Tensor %3705, %3704, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
%3707 = torch.aten.view %3706, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%3708 = torch.aten.slice.Tensor %3707, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%3709 = torch.aten.slice.Tensor %3707, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%3710 = torch.aten.gelu %3709, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%3711 = torch.aten.mul.Tensor %3708, %3710 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%3712 = torch.aten.transpose.int %380, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
%3713 = torch.aten.view %3711, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%3714 = torch.aten.mm %3713, %3712 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3715 = torch.aten.mul.Scalar %381, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3716 = torch.aten.add.Tensor %3715, %3714, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%3717 = torch.aten.view %3716, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3718 = torch.aten.add.Tensor %3717, %3685, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3719 = torch.aten.view %3718, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%3720 = torch.aten.permute %3719, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%3721 = torch.aten.convolution %3720, %382, %383, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3722 = torch.aten.add.Tensor %3721, %3509, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3723 = torch.prim.ListConstruct %3722, %2294 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
%3724 = torch.aten.cat %3723, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%3725 = torch.aten.clone %3724, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%3726 = torch.aten.view %3725, %3424 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
%3727 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3728 = torch.aten.to.dtype %3727, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3729 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3730 = torch.aten.broadcast_to %3728, %3729 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f32>
%3731 = torch.valsem.aten.copy %3730, %3726, %false : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,80,256],f16>, !torch.bool -> !torch.vtensor<[2,32,80,256],f32>
%3732 = torch.aten.to.dtype %3731, %int7, %false, %false, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f64>
%3733 = torch.aten.sum.dim_IntList %3732, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3734 = torch.aten.div.Scalar %3733, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3735 = torch.aten.sub.Tensor %3732, %3734, %float1.000000e00 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,80,256],f64>
%3736 = torch.aten.mul.Tensor %3735, %3735 : !torch.vtensor<[2,32,80,256],f64>, !torch.vtensor<[2,32,80,256],f64> -> !torch.vtensor<[2,32,80,256],f64>
%3737 = torch.aten.sum.dim_IntList %3736, %754, %true, %none : !torch.vtensor<[2,32,80,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3738 = torch.aten.div.Scalar %3737, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3739 = torch.aten.to.dtype %3738, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3740 = torch.aten.sum.dim_IntList %3731, %754, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3741 = torch.aten.div.Scalar %3740, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3742 = torch.aten.add.Tensor %3739, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3743 = torch.aten.rsqrt %3742 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3744 = torch.aten.sub.Tensor %3726, %3741, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
%3745 = torch.aten.mul.Tensor %3744, %3743 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
%3746 = torch.aten.view %3745, %3445 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
%3747 = torch.aten.unsqueeze %384, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3748 = torch.aten.unsqueeze %3747, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3749 = torch.aten.mul.Tensor %3746, %3748 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
%3750 = torch.aten.unsqueeze %385, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%3751 = torch.aten.unsqueeze %3750, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%3752 = torch.aten.add.Tensor %3749, %3751, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
%3753 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3754 = torch.aten.to.dtype %3753, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3755 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3756 = torch.aten.broadcast_to %3754, %3755 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f16>
%3757 = torch.valsem.aten.copy %3756, %3752, %false : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f32>, !torch.bool -> !torch.vtensor<[2,2560,16,16],f16>
%3758 = torch.aten.sigmoid %3757 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
%3759 = torch.aten.mul.Tensor %3758, %3757 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
%3760 = torch.aten.convolution %3759, %386, %387, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3761 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3762 = torch.aten.mul.Tensor %3761, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3763 = torch.aten.transpose.int %388, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3764 = torch.aten.mm %3762, %3763 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3765 = torch.aten.mul.Scalar %389, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3766 = torch.aten.add.Tensor %3765, %3764, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%3767 = torch.aten.slice.Tensor %3766, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3768 = torch.aten.slice.Tensor %3767, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%3769 = torch.aten.unsqueeze %3768, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%3770 = torch.aten.unsqueeze %3769, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%3771 = torch.aten.add.Tensor %3760, %3770, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3772 = torch.aten.view %3771, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%3773 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3774 = torch.aten.to.dtype %3773, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3775 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3776 = torch.aten.broadcast_to %3774, %3775 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%3777 = torch.valsem.aten.copy %3776, %3772, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%3778 = torch.aten.to.dtype %3777, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%3779 = torch.aten.sum.dim_IntList %3778, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3780 = torch.aten.div.Scalar %3779, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3781 = torch.aten.sub.Tensor %3778, %3780, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%3782 = torch.aten.mul.Tensor %3781, %3781 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%3783 = torch.aten.sum.dim_IntList %3782, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3784 = torch.aten.div.Scalar %3783, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3785 = torch.aten.to.dtype %3784, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3786 = torch.aten.sum.dim_IntList %3777, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3787 = torch.aten.div.Scalar %3786, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3788 = torch.aten.add.Tensor %3785, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3789 = torch.aten.rsqrt %3788 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3790 = torch.aten.sub.Tensor %3772, %3787, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%3791 = torch.aten.mul.Tensor %3790, %3789 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%3792 = torch.aten.view %3791, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%3793 = torch.aten.unsqueeze %390, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3794 = torch.aten.unsqueeze %3793, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3795 = torch.aten.mul.Tensor %3792, %3794 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%3796 = torch.aten.unsqueeze %391, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3797 = torch.aten.unsqueeze %3796, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3798 = torch.aten.add.Tensor %3795, %3797, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%3799 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3800 = torch.aten.to.dtype %3799, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3801 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3802 = torch.aten.broadcast_to %3800, %3801 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%3803 = torch.valsem.aten.copy %3802, %3798, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%3804 = torch.aten.sigmoid %3803 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%3805 = torch.aten.mul.Tensor %3804, %3803 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%3806 = torch.aten.convolution %3805, %392, %393, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3807 = torch.aten.convolution %3724, %394, %395, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3808 = torch.aten.add.Tensor %3807, %3806, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3809 = torch.aten.div.Tensor %3808, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%3810 = torch.aten.clone %3809, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3811 = torch.aten.view %3810, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%3812 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3813 = torch.aten.to.dtype %3812, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%3814 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3815 = torch.aten.broadcast_to %3813, %3814 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%3816 = torch.valsem.aten.copy %3815, %3811, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%3817 = torch.aten.to.dtype %3816, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%3818 = torch.aten.sum.dim_IntList %3817, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3819 = torch.aten.div.Scalar %3818, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3820 = torch.aten.sub.Tensor %3817, %3819, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%3821 = torch.aten.mul.Tensor %3820, %3820 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%3822 = torch.aten.sum.dim_IntList %3821, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%3823 = torch.aten.div.Scalar %3822, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%3824 = torch.aten.to.dtype %3823, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3825 = torch.aten.sum.dim_IntList %3816, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3826 = torch.aten.div.Scalar %3825, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3827 = torch.aten.add.Tensor %3824, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3828 = torch.aten.rsqrt %3827 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3829 = torch.aten.sub.Tensor %3811, %3826, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%3830 = torch.aten.mul.Tensor %3829, %3828 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%3831 = torch.aten.view %3830, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%3832 = torch.aten.unsqueeze %396, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3833 = torch.aten.unsqueeze %3832, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3834 = torch.aten.mul.Tensor %3831, %3833 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%3835 = torch.aten.unsqueeze %397, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%3836 = torch.aten.unsqueeze %3835, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%3837 = torch.aten.add.Tensor %3834, %3836, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%3838 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%3839 = torch.aten.to.dtype %3838, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%3840 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3841 = torch.aten.broadcast_to %3839, %3840 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%3842 = torch.valsem.aten.copy %3841, %3837, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%3843 = torch.aten.convolution %3842, %398, %399, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%3844 = torch.aten.permute %3843, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%3845 = torch.aten.view %3844, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3846 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3847 = torch.aten.sum.dim_IntList %3845, %3846, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3848 = torch.aten.div.Scalar %3847, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3849 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3850 = torch.aten.broadcast_to %3848, %3849 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3851 = torch.aten.sub.Tensor %3845, %3850, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3852 = torch.aten.mul.Tensor %3851, %3851 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3853 = torch.aten.sum.dim_IntList %3852, %3846, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3854 = torch.aten.div.Scalar %3853, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3855 = torch.aten.add.Scalar %3854, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3856 = torch.aten.rsqrt %3855 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%3857 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3858 = torch.aten.broadcast_to %3856, %3857 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3859 = torch.aten.mul.Tensor %3851, %3858 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3860 = torch.aten.mul.Tensor %3859, %400 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3861 = torch.aten.add.Tensor %3860, %401, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3862 = torch.aten.transpose.int %402, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3863 = torch.aten.view %3861, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3864 = torch.aten.mm %3863, %3862 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3865 = torch.aten.view %3864, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3866 = torch.aten.transpose.int %403, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3867 = torch.aten.view %3861, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3868 = torch.aten.mm %3867, %3866 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3869 = torch.aten.view %3868, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3870 = torch.aten.transpose.int %404, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3871 = torch.aten.view %3861, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3872 = torch.aten.mm %3871, %3870 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3873 = torch.aten.view %3872, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3874 = torch.aten.view %3865, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3875 = torch.aten.permute %3874, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3876 = torch.aten.clone %3875, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3877 = torch.aten.view %3876, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3878 = torch.aten.view %3869, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3879 = torch.aten.permute %3878, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3880 = torch.aten.clone %3879, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3881 = torch.aten.view %3880, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3882 = torch.aten.view %3873, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3883 = torch.aten.permute %3882, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3884 = torch.aten.clone %3883, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3885 = torch.aten.view %3884, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3886 = torch.aten.transpose.int %3881, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%3887 = torch.aten.broadcast_to %3877, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3888 = torch.aten.view %3887, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3889 = torch.aten.broadcast_to %3886, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%3890 = torch.aten.view %3889, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%3891 = torch.aten.bmm %3888, %3890 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%3892 = torch.aten.view %3891, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%3893 = torch.aten.mul.Tensor %3892, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%values_30, %indices_31 = torch.aten.max.dim %3893, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%3894 = torch.aten.sub.Tensor %3893, %values_30, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
%3895 = torch.aten.exp %3894 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
%3896 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%3897 = torch.aten.sum.dim_IntList %3895, %3896, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%3898 = torch.aten.div.Tensor %3895, %3897 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
%3899 = torch.aten.broadcast_to %3898, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%3900 = torch.aten.view %3899, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%3901 = torch.aten.broadcast_to %3885, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3902 = torch.aten.view %3901, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3903 = torch.aten.bmm %3900, %3902 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%3904 = torch.aten.view %3903, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3905 = torch.aten.view %3904, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3906 = torch.aten.permute %3905, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3907 = torch.aten.clone %3906, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%3908 = torch.aten.view %3907, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3909 = torch.aten.transpose.int %405, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3910 = torch.aten.view %3908, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3911 = torch.aten.mm %3910, %3909 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3912 = torch.aten.mul.Scalar %406, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3913 = torch.aten.add.Tensor %3912, %3911, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%3914 = torch.aten.view %3913, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3915 = torch.aten.add.Tensor %3914, %3845, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3916 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3917 = torch.aten.sum.dim_IntList %3915, %3916, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3918 = torch.aten.div.Scalar %3917, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3919 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3920 = torch.aten.broadcast_to %3918, %3919 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3921 = torch.aten.sub.Tensor %3915, %3920, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3922 = torch.aten.mul.Tensor %3921, %3921 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3923 = torch.aten.sum.dim_IntList %3922, %3916, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3924 = torch.aten.div.Scalar %3923, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3925 = torch.aten.add.Scalar %3924, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3926 = torch.aten.rsqrt %3925 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%3927 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3928 = torch.aten.broadcast_to %3926, %3927 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3929 = torch.aten.mul.Tensor %3921, %3928 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3930 = torch.aten.mul.Tensor %3929, %407 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3931 = torch.aten.add.Tensor %3930, %408, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3932 = torch.aten.transpose.int %409, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3933 = torch.aten.view %3931, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3934 = torch.aten.mm %3933, %3932 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3935 = torch.aten.view %3934, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3936 = torch.aten.transpose.int %410, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%3937 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3938 = torch.aten.mm %3937, %3936 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%3939 = torch.aten.view %3938, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%3940 = torch.aten.transpose.int %411, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%3941 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3942 = torch.aten.mm %3941, %3940 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%3943 = torch.aten.view %3942, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%3944 = torch.aten.view %3935, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3945 = torch.aten.permute %3944, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3946 = torch.aten.clone %3945, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%3947 = torch.aten.view %3946, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3948 = torch.aten.view %3939, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%3949 = torch.aten.permute %3948, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%3950 = torch.aten.clone %3949, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%3951 = torch.aten.view %3950, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3952 = torch.aten.view %3943, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%3953 = torch.aten.permute %3952, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%3954 = torch.aten.clone %3953, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%3955 = torch.aten.view %3954, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3956 = torch.aten.transpose.int %3951, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%3957 = torch.aten.broadcast_to %3947, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3958 = torch.aten.view %3957, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3959 = torch.aten.broadcast_to %3956, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%3960 = torch.aten.view %3959, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%3961 = torch.aten.bmm %3958, %3960 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%3962 = torch.aten.view %3961, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%3963 = torch.aten.mul.Tensor %3962, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%values_32, %indices_33 = torch.aten.max.dim %3963, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%3964 = torch.aten.sub.Tensor %3963, %values_32, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
%3965 = torch.aten.exp %3964 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
%3966 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%3967 = torch.aten.sum.dim_IntList %3965, %3966, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%3968 = torch.aten.div.Tensor %3965, %3967 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
%3969 = torch.aten.broadcast_to %3968, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%3970 = torch.aten.view %3969, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%3971 = torch.aten.broadcast_to %3955, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3972 = torch.aten.view %3971, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%3973 = torch.aten.bmm %3970, %3972 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%3974 = torch.aten.view %3973, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%3975 = torch.aten.view %3974, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%3976 = torch.aten.permute %3975, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%3977 = torch.aten.clone %3976, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%3978 = torch.aten.view %3977, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3979 = torch.aten.transpose.int %412, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%3980 = torch.aten.view %3978, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%3981 = torch.aten.mm %3980, %3979 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%3982 = torch.aten.mul.Scalar %413, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%3983 = torch.aten.add.Tensor %3982, %3981, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%3984 = torch.aten.view %3983, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3985 = torch.aten.add.Tensor %3984, %3915, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3986 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%3987 = torch.aten.sum.dim_IntList %3985, %3986, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3988 = torch.aten.div.Scalar %3987, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3989 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3990 = torch.aten.broadcast_to %3988, %3989 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3991 = torch.aten.sub.Tensor %3985, %3990, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%3992 = torch.aten.mul.Tensor %3991, %3991 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%3993 = torch.aten.sum.dim_IntList %3992, %3986, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%3994 = torch.aten.div.Scalar %3993, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3995 = torch.aten.add.Scalar %3994, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%3996 = torch.aten.rsqrt %3995 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%3997 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3998 = torch.aten.broadcast_to %3996, %3997 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%3999 = torch.aten.mul.Tensor %3991, %3998 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4000 = torch.aten.mul.Tensor %3999, %414 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4001 = torch.aten.add.Tensor %4000, %415, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4002 = torch.aten.transpose.int %416, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
%4003 = torch.aten.view %4001, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4004 = torch.aten.mm %4003, %4002 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
%4005 = torch.aten.mul.Scalar %417, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
%4006 = torch.aten.add.Tensor %4005, %4004, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
%4007 = torch.aten.view %4006, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%4008 = torch.aten.slice.Tensor %4007, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%4009 = torch.aten.slice.Tensor %4007, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%4010 = torch.aten.gelu %4009, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%4011 = torch.aten.mul.Tensor %4008, %4010 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%4012 = torch.aten.transpose.int %418, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
%4013 = torch.aten.view %4011, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%4014 = torch.aten.mm %4013, %4012 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4015 = torch.aten.mul.Scalar %419, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%4016 = torch.aten.add.Tensor %4015, %4014, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%4017 = torch.aten.view %4016, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4018 = torch.aten.add.Tensor %4017, %3985, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4019 = torch.aten.view %4018, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%4020 = torch.aten.permute %4019, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%4021 = torch.aten.convolution %4020, %420, %421, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4022 = torch.aten.add.Tensor %4021, %3809, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4023 = torch.prim.ListConstruct %4022, %1977 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,640,16,16],f16>) -> !torch.list<vtensor>
%4024 = torch.aten.cat %4023, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
%4025 = torch.aten.clone %4024, %int0 : !torch.vtensor<[2,1920,16,16],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
%4026 = torch.prim.ListConstruct %int2, %int32, %int60, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4027 = torch.aten.view %4025, %4026 : !torch.vtensor<[2,1920,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,256],f16>
%4028 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4029 = torch.aten.to.dtype %4028, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4030 = torch.prim.ListConstruct %int2, %int32, %int60, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4031 = torch.aten.broadcast_to %4029, %4030 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,60,256],f32>
%4032 = torch.valsem.aten.copy %4031, %4027, %false : !torch.vtensor<[2,32,60,256],f32>, !torch.vtensor<[2,32,60,256],f16>, !torch.bool -> !torch.vtensor<[2,32,60,256],f32>
%4033 = torch.aten.to.dtype %4032, %int7, %false, %false, %none : !torch.vtensor<[2,32,60,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,256],f64>
%4034 = torch.aten.sum.dim_IntList %4033, %754, %true, %none : !torch.vtensor<[2,32,60,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4035 = torch.aten.div.Scalar %4034, %int15360 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4036 = torch.aten.sub.Tensor %4033, %4035, %float1.000000e00 : !torch.vtensor<[2,32,60,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,60,256],f64>
%4037 = torch.aten.mul.Tensor %4036, %4036 : !torch.vtensor<[2,32,60,256],f64>, !torch.vtensor<[2,32,60,256],f64> -> !torch.vtensor<[2,32,60,256],f64>
%4038 = torch.aten.sum.dim_IntList %4037, %754, %true, %none : !torch.vtensor<[2,32,60,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4039 = torch.aten.div.Scalar %4038, %int15360 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4040 = torch.aten.to.dtype %4039, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4041 = torch.aten.sum.dim_IntList %4032, %754, %true, %none : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4042 = torch.aten.div.Scalar %4041, %int15360 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4043 = torch.aten.add.Tensor %4040, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4044 = torch.aten.rsqrt %4043 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4045 = torch.aten.sub.Tensor %4027, %4042, %int1 : !torch.vtensor<[2,32,60,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,256],f32>
%4046 = torch.aten.mul.Tensor %4045, %4044 : !torch.vtensor<[2,32,60,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,256],f32>
%4047 = torch.prim.ListConstruct %int2, %int1920, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4048 = torch.aten.view %4046, %4047 : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,16,16],f32>
%4049 = torch.aten.unsqueeze %422, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%4050 = torch.aten.unsqueeze %4049, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%4051 = torch.aten.mul.Tensor %4048, %4050 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,16,16],f32>
%4052 = torch.aten.unsqueeze %423, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%4053 = torch.aten.unsqueeze %4052, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%4054 = torch.aten.add.Tensor %4051, %4053, %int1 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f32>
%4055 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4056 = torch.aten.to.dtype %4055, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4057 = torch.prim.ListConstruct %int2, %int1920, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4058 = torch.aten.broadcast_to %4056, %4057 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1920,16,16],f16>
%4059 = torch.valsem.aten.copy %4058, %4054, %false : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[2,1920,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1920,16,16],f16>
%4060 = torch.aten.sigmoid %4059 : !torch.vtensor<[2,1920,16,16],f16> -> !torch.vtensor<[2,1920,16,16],f16>
%4061 = torch.aten.mul.Tensor %4060, %4059 : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[2,1920,16,16],f16> -> !torch.vtensor<[2,1920,16,16],f16>
%4062 = torch.aten.convolution %4061, %424, %425, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4063 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4064 = torch.aten.mul.Tensor %4063, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4065 = torch.aten.transpose.int %426, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4066 = torch.aten.mm %4064, %4065 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4067 = torch.aten.mul.Scalar %427, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%4068 = torch.aten.add.Tensor %4067, %4066, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16>
%4069 = torch.aten.slice.Tensor %4068, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%4070 = torch.aten.slice.Tensor %4069, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%4071 = torch.aten.unsqueeze %4070, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%4072 = torch.aten.unsqueeze %4071, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%4073 = torch.aten.add.Tensor %4062, %4072, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4074 = torch.aten.view %4073, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%4075 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4076 = torch.aten.to.dtype %4075, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4077 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4078 = torch.aten.broadcast_to %4076, %4077 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%4079 = torch.valsem.aten.copy %4078, %4074, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%4080 = torch.aten.to.dtype %4079, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%4081 = torch.aten.sum.dim_IntList %4080, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4082 = torch.aten.div.Scalar %4081, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4083 = torch.aten.sub.Tensor %4080, %4082, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%4084 = torch.aten.mul.Tensor %4083, %4083 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%4085 = torch.aten.sum.dim_IntList %4084, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4086 = torch.aten.div.Scalar %4085, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4087 = torch.aten.to.dtype %4086, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4088 = torch.aten.sum.dim_IntList %4079, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4089 = torch.aten.div.Scalar %4088, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4090 = torch.aten.add.Tensor %4087, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4091 = torch.aten.rsqrt %4090 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4092 = torch.aten.sub.Tensor %4074, %4089, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%4093 = torch.aten.mul.Tensor %4092, %4091 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%4094 = torch.aten.view %4093, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%4095 = torch.aten.unsqueeze %428, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%4096 = torch.aten.unsqueeze %4095, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%4097 = torch.aten.mul.Tensor %4094, %4096 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%4098 = torch.aten.unsqueeze %429, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%4099 = torch.aten.unsqueeze %4098, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%4100 = torch.aten.add.Tensor %4097, %4099, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%4101 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4102 = torch.aten.to.dtype %4101, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4103 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4104 = torch.aten.broadcast_to %4102, %4103 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%4105 = torch.valsem.aten.copy %4104, %4100, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%4106 = torch.aten.sigmoid %4105 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%4107 = torch.aten.mul.Tensor %4106, %4105 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%4108 = torch.aten.convolution %4107, %430, %431, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4109 = torch.aten.convolution %4024, %432, %433, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4110 = torch.aten.add.Tensor %4109, %4108, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4111 = torch.aten.div.Tensor %4110, %5 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%4112 = torch.aten.clone %4111, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4113 = torch.aten.view %4112, %2027 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%4114 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4115 = torch.aten.to.dtype %4114, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4116 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4117 = torch.aten.broadcast_to %4115, %4116 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32>
%4118 = torch.valsem.aten.copy %4117, %4113, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32>
%4119 = torch.aten.to.dtype %4118, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64>
%4120 = torch.aten.sum.dim_IntList %4119, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4121 = torch.aten.div.Scalar %4120, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4122 = torch.aten.sub.Tensor %4119, %4121, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64>
%4123 = torch.aten.mul.Tensor %4122, %4122 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64>
%4124 = torch.aten.sum.dim_IntList %4123, %754, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4125 = torch.aten.div.Scalar %4124, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4126 = torch.aten.to.dtype %4125, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4127 = torch.aten.sum.dim_IntList %4118, %754, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4128 = torch.aten.div.Scalar %4127, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4129 = torch.aten.add.Tensor %4126, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4130 = torch.aten.rsqrt %4129 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4131 = torch.aten.sub.Tensor %4113, %4128, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%4132 = torch.aten.mul.Tensor %4131, %4130 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%4133 = torch.aten.view %4132, %2048 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%4134 = torch.aten.unsqueeze %434, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%4135 = torch.aten.unsqueeze %4134, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%4136 = torch.aten.mul.Tensor %4133, %4135 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%4137 = torch.aten.unsqueeze %435, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%4138 = torch.aten.unsqueeze %4137, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%4139 = torch.aten.add.Tensor %4136, %4138, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%4140 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4141 = torch.aten.to.dtype %4140, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4142 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4143 = torch.aten.broadcast_to %4141, %4142 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%4144 = torch.valsem.aten.copy %4143, %4139, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%4145 = torch.aten.convolution %4144, %436, %437, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4146 = torch.aten.permute %4145, %866 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%4147 = torch.aten.view %4146, %2102 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4148 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4149 = torch.aten.sum.dim_IntList %4147, %4148, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%4150 = torch.aten.div.Scalar %4149, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4151 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4152 = torch.aten.broadcast_to %4150, %4151 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4153 = torch.aten.sub.Tensor %4147, %4152, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4154 = torch.aten.mul.Tensor %4153, %4153 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4155 = torch.aten.sum.dim_IntList %4154, %4148, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%4156 = torch.aten.div.Scalar %4155, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4157 = torch.aten.add.Scalar %4156, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4158 = torch.aten.rsqrt %4157 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%4159 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4160 = torch.aten.broadcast_to %4158, %4159 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4161 = torch.aten.mul.Tensor %4153, %4160 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4162 = torch.aten.mul.Tensor %4161, %438 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4163 = torch.aten.add.Tensor %4162, %439, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4164 = torch.aten.transpose.int %440, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4165 = torch.aten.view %4163, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4166 = torch.aten.mm %4165, %4164 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4167 = torch.aten.view %4166, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4168 = torch.aten.transpose.int %441, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4169 = torch.aten.view %4163, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4170 = torch.aten.mm %4169, %4168 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4171 = torch.aten.view %4170, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4172 = torch.aten.transpose.int %442, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4173 = torch.aten.view %4163, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4174 = torch.aten.mm %4173, %4172 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4175 = torch.aten.view %4174, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4176 = torch.aten.view %4167, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%4177 = torch.aten.permute %4176, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%4178 = torch.aten.clone %4177, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%4179 = torch.aten.view %4178, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4180 = torch.aten.view %4171, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%4181 = torch.aten.permute %4180, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%4182 = torch.aten.clone %4181, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%4183 = torch.aten.view %4182, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4184 = torch.aten.view %4175, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%4185 = torch.aten.permute %4184, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%4186 = torch.aten.clone %4185, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%4187 = torch.aten.view %4186, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4188 = torch.aten.transpose.int %4183, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%4189 = torch.aten.broadcast_to %4179, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4190 = torch.aten.view %4189, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4191 = torch.aten.broadcast_to %4188, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%4192 = torch.aten.view %4191, %2150 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%4193 = torch.aten.bmm %4190, %4192 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%4194 = torch.aten.view %4193, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%4195 = torch.aten.mul.Tensor %4194, %1 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%values_34, %indices_35 = torch.aten.max.dim %4195, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%4196 = torch.aten.sub.Tensor %4195, %values_34, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16>
%4197 = torch.aten.exp %4196 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16>
%4198 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%4199 = torch.aten.sum.dim_IntList %4197, %4198, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%4200 = torch.aten.div.Tensor %4197, %4199 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16>
%4201 = torch.aten.broadcast_to %4200, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%4202 = torch.aten.view %4201, %2154 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%4203 = torch.aten.broadcast_to %4187, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4204 = torch.aten.view %4203, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4205 = torch.aten.bmm %4202, %4204 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%4206 = torch.aten.view %4205, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4207 = torch.aten.view %4206, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%4208 = torch.aten.permute %4207, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%4209 = torch.aten.clone %4208, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%4210 = torch.aten.view %4209, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4211 = torch.aten.transpose.int %443, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4212 = torch.aten.view %4210, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4213 = torch.aten.mm %4212, %4211 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4214 = torch.aten.mul.Scalar %444, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%4215 = torch.aten.add.Tensor %4214, %4213, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%4216 = torch.aten.view %4215, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4217 = torch.aten.add.Tensor %4216, %4147, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4218 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4219 = torch.aten.sum.dim_IntList %4217, %4218, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%4220 = torch.aten.div.Scalar %4219, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4221 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4222 = torch.aten.broadcast_to %4220, %4221 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4223 = torch.aten.sub.Tensor %4217, %4222, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4224 = torch.aten.mul.Tensor %4223, %4223 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4225 = torch.aten.sum.dim_IntList %4224, %4218, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%4226 = torch.aten.div.Scalar %4225, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4227 = torch.aten.add.Scalar %4226, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4228 = torch.aten.rsqrt %4227 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%4229 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4230 = torch.aten.broadcast_to %4228, %4229 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4231 = torch.aten.mul.Tensor %4223, %4230 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4232 = torch.aten.mul.Tensor %4231, %445 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4233 = torch.aten.add.Tensor %4232, %446, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4234 = torch.aten.transpose.int %447, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4235 = torch.aten.view %4233, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4236 = torch.aten.mm %4235, %4234 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4237 = torch.aten.view %4236, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4238 = torch.aten.transpose.int %448, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%4239 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%4240 = torch.aten.mm %4239, %4238 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%4241 = torch.aten.view %4240, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%4242 = torch.aten.transpose.int %449, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16>
%4243 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%4244 = torch.aten.mm %4243, %4242 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%4245 = torch.aten.view %4244, %2203 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%4246 = torch.aten.view %4237, %2133 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%4247 = torch.aten.permute %4246, %901 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%4248 = torch.aten.clone %4247, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%4249 = torch.aten.view %4248, %2137 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4250 = torch.aten.view %4241, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%4251 = torch.aten.permute %4250, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%4252 = torch.aten.clone %4251, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%4253 = torch.aten.view %4252, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%4254 = torch.aten.view %4245, %2213 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%4255 = torch.aten.permute %4254, %901 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%4256 = torch.aten.clone %4255, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%4257 = torch.aten.view %4256, %2217 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%4258 = torch.aten.transpose.int %4253, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%4259 = torch.aten.broadcast_to %4249, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4260 = torch.aten.view %4259, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4261 = torch.aten.broadcast_to %4258, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%4262 = torch.aten.view %4261, %2226 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%4263 = torch.aten.bmm %4260, %4262 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%4264 = torch.aten.view %4263, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%4265 = torch.aten.mul.Tensor %4264, %1 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%values_36, %indices_37 = torch.aten.max.dim %4265, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64>
%4266 = torch.aten.sub.Tensor %4265, %values_36, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16>
%4267 = torch.aten.exp %4266 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16>
%4268 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%4269 = torch.aten.sum.dim_IntList %4267, %4268, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16>
%4270 = torch.aten.div.Tensor %4267, %4269 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16>
%4271 = torch.aten.broadcast_to %4270, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%4272 = torch.aten.view %4271, %2230 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%4273 = torch.aten.broadcast_to %4257, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%4274 = torch.aten.view %4273, %2217 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%4275 = torch.aten.bmm %4272, %4274 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%4276 = torch.aten.view %4275, %2137 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%4277 = torch.aten.view %4276, %2168 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%4278 = torch.aten.permute %4277, %901 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%4279 = torch.aten.clone %4278, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%4280 = torch.aten.view %4279, %2102 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4281 = torch.aten.transpose.int %450, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
%4282 = torch.aten.view %4280, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4283 = torch.aten.mm %4282, %4281 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4284 = torch.aten.mul.Scalar %451, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%4285 = torch.aten.add.Tensor %4284, %4283, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%4286 = torch.aten.view %4285, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4287 = torch.aten.add.Tensor %4286, %4217, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4288 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4289 = torch.aten.sum.dim_IntList %4287, %4288, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%4290 = torch.aten.div.Scalar %4289, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4291 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4292 = torch.aten.broadcast_to %4290, %4291 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4293 = torch.aten.sub.Tensor %4287, %4292, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4294 = torch.aten.mul.Tensor %4293, %4293 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4295 = torch.aten.sum.dim_IntList %4294, %4288, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16>
%4296 = torch.aten.div.Scalar %4295, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4297 = torch.aten.add.Scalar %4296, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16>
%4298 = torch.aten.rsqrt %4297 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16>
%4299 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4300 = torch.aten.broadcast_to %4298, %4299 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4301 = torch.aten.mul.Tensor %4293, %4300 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4302 = torch.aten.mul.Tensor %4301, %452 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16>
%4303 = torch.aten.add.Tensor %4302, %453, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4304 = torch.aten.transpose.int %454, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
%4305 = torch.aten.view %4303, %2121 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%4306 = torch.aten.mm %4305, %4304 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16>
%4307 = torch.aten.mul.Scalar %455, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
%4308 = torch.aten.add.Tensor %4307, %4306, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16>
%4309 = torch.aten.view %4308, %2276 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%4310 = torch.aten.slice.Tensor %4309, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%4311 = torch.aten.slice.Tensor %4309, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%4312 = torch.aten.gelu %4311, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%4313 = torch.aten.mul.Tensor %4310, %4312 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%4314 = torch.aten.transpose.int %456, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
%4315 = torch.aten.view %4313, %2283 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%4316 = torch.aten.mm %4315, %4314 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16>
%4317 = torch.aten.mul.Scalar %457, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
%4318 = torch.aten.add.Tensor %4317, %4316, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16>
%4319 = torch.aten.view %4318, %2102 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%4320 = torch.aten.add.Tensor %4319, %4287, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%4321 = torch.aten.view %4320, %2290 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%4322 = torch.aten.permute %4321, %1060 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%4323 = torch.aten.convolution %4322, %458, %459, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4324 = torch.aten.add.Tensor %4323, %4111, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%4325 = torch.aten.upsample_nearest2d.vec %4324, %none, %3418 : !torch.vtensor<[2,1280,16,16],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,32,32],f16>
%4326 = torch.aten.convolution %4325, %460, %461, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
%4327 = torch.prim.ListConstruct %4326, %1976 : (!torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
%4328 = torch.aten.cat %4327, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
%4329 = torch.aten.clone %4328, %int0 : !torch.vtensor<[2,1920,32,32],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
%4330 = torch.prim.ListConstruct %int2, %int32, %int60, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4331 = torch.aten.view %4329, %4330 : !torch.vtensor<[2,1920,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,1024],f16>
%4332 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4333 = torch.aten.to.dtype %4332, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4334 = torch.prim.ListConstruct %int2, %int32, %int60, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4335 = torch.aten.broadcast_to %4333, %4334 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,60,1024],f32>
%4336 = torch.valsem.aten.copy %4335, %4331, %false : !torch.vtensor<[2,32,60,1024],f32>, !torch.vtensor<[2,32,60,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,60,1024],f32>
%4337 = torch.aten.to.dtype %4336, %int7, %false, %false, %none : !torch.vtensor<[2,32,60,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,1024],f64>
%4338 = torch.aten.sum.dim_IntList %4337, %754, %true, %none : !torch.vtensor<[2,32,60,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4339 = torch.aten.div.Scalar %4338, %int61440 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4340 = torch.aten.sub.Tensor %4337, %4339, %float1.000000e00 : !torch.vtensor<[2,32,60,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,60,1024],f64>
%4341 = torch.aten.mul.Tensor %4340, %4340 : !torch.vtensor<[2,32,60,1024],f64>, !torch.vtensor<[2,32,60,1024],f64> -> !torch.vtensor<[2,32,60,1024],f64>
%4342 = torch.aten.sum.dim_IntList %4341, %754, %true, %none : !torch.vtensor<[2,32,60,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4343 = torch.aten.div.Scalar %4342, %int61440 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4344 = torch.aten.to.dtype %4343, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4345 = torch.aten.sum.dim_IntList %4336, %754, %true, %none : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4346 = torch.aten.div.Scalar %4345, %int61440 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4347 = torch.aten.add.Tensor %4344, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4348 = torch.aten.rsqrt %4347 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4349 = torch.aten.sub.Tensor %4331, %4346, %int1 : !torch.vtensor<[2,32,60,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,1024],f32>
%4350 = torch.aten.mul.Tensor %4349, %4348 : !torch.vtensor<[2,32,60,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,1024],f32>
%4351 = torch.prim.ListConstruct %int2, %int1920, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4352 = torch.aten.view %4350, %4351 : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,32,32],f32>
%4353 = torch.aten.unsqueeze %462, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%4354 = torch.aten.unsqueeze %4353, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%4355 = torch.aten.mul.Tensor %4352, %4354 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,32,32],f32>
%4356 = torch.aten.unsqueeze %463, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%4357 = torch.aten.unsqueeze %4356, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%4358 = torch.aten.add.Tensor %4355, %4357, %int1 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f32>
%4359 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4360 = torch.aten.to.dtype %4359, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4361 = torch.prim.ListConstruct %int2, %int1920, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4362 = torch.aten.broadcast_to %4360, %4361 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1920,32,32],f16>
%4363 = torch.valsem.aten.copy %4362, %4358, %false : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[2,1920,32,32],f32>, !torch.bool -> !torch.vtensor<[2,1920,32,32],f16>
%4364 = torch.aten.sigmoid %4363 : !torch.vtensor<[2,1920,32,32],f16> -> !torch.vtensor<[2,1920,32,32],f16>
%4365 = torch.aten.mul.Tensor %4364, %4363 : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[2,1920,32,32],f16> -> !torch.vtensor<[2,1920,32,32],f16>
%4366 = torch.aten.convolution %4365, %464, %465, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4367 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4368 = torch.aten.mul.Tensor %4367, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4369 = torch.aten.transpose.int %466, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
%4370 = torch.aten.mm %4368, %4369 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
%4371 = torch.aten.mul.Scalar %467, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4372 = torch.aten.add.Tensor %4371, %4370, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
%4373 = torch.aten.slice.Tensor %4372, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%4374 = torch.aten.slice.Tensor %4373, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%4375 = torch.aten.unsqueeze %4374, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%4376 = torch.aten.unsqueeze %4375, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%4377 = torch.aten.add.Tensor %4366, %4376, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4378 = torch.aten.view %4377, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%4379 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4380 = torch.aten.to.dtype %4379, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4381 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4382 = torch.aten.broadcast_to %4380, %4381 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%4383 = torch.valsem.aten.copy %4382, %4378, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%4384 = torch.aten.to.dtype %4383, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%4385 = torch.aten.sum.dim_IntList %4384, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4386 = torch.aten.div.Scalar %4385, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4387 = torch.aten.sub.Tensor %4384, %4386, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%4388 = torch.aten.mul.Tensor %4387, %4387 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%4389 = torch.aten.sum.dim_IntList %4388, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4390 = torch.aten.div.Scalar %4389, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4391 = torch.aten.to.dtype %4390, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4392 = torch.aten.sum.dim_IntList %4383, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4393 = torch.aten.div.Scalar %4392, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4394 = torch.aten.add.Tensor %4391, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4395 = torch.aten.rsqrt %4394 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4396 = torch.aten.sub.Tensor %4378, %4393, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%4397 = torch.aten.mul.Tensor %4396, %4395 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%4398 = torch.aten.view %4397, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%4399 = torch.aten.unsqueeze %468, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4400 = torch.aten.unsqueeze %4399, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4401 = torch.aten.mul.Tensor %4398, %4400 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%4402 = torch.aten.unsqueeze %469, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4403 = torch.aten.unsqueeze %4402, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4404 = torch.aten.add.Tensor %4401, %4403, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%4405 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4406 = torch.aten.to.dtype %4405, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4407 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4408 = torch.aten.broadcast_to %4406, %4407 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%4409 = torch.valsem.aten.copy %4408, %4404, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%4410 = torch.aten.sigmoid %4409 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%4411 = torch.aten.mul.Tensor %4410, %4409 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%4412 = torch.aten.convolution %4411, %470, %471, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4413 = torch.aten.convolution %4328, %472, %473, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4414 = torch.aten.add.Tensor %4413, %4412, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4415 = torch.aten.div.Tensor %4414, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%4416 = torch.aten.clone %4415, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4417 = torch.aten.view %4416, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%4418 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4419 = torch.aten.to.dtype %4418, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4420 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4421 = torch.aten.broadcast_to %4419, %4420 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%4422 = torch.valsem.aten.copy %4421, %4417, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%4423 = torch.aten.to.dtype %4422, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%4424 = torch.aten.sum.dim_IntList %4423, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4425 = torch.aten.div.Scalar %4424, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4426 = torch.aten.sub.Tensor %4423, %4425, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%4427 = torch.aten.mul.Tensor %4426, %4426 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%4428 = torch.aten.sum.dim_IntList %4427, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4429 = torch.aten.div.Scalar %4428, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4430 = torch.aten.to.dtype %4429, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4431 = torch.aten.sum.dim_IntList %4422, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4432 = torch.aten.div.Scalar %4431, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4433 = torch.aten.add.Tensor %4430, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4434 = torch.aten.rsqrt %4433 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4435 = torch.aten.sub.Tensor %4417, %4432, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%4436 = torch.aten.mul.Tensor %4435, %4434 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%4437 = torch.aten.view %4436, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%4438 = torch.aten.unsqueeze %474, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4439 = torch.aten.unsqueeze %4438, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4440 = torch.aten.mul.Tensor %4437, %4439 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%4441 = torch.aten.unsqueeze %475, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4442 = torch.aten.unsqueeze %4441, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4443 = torch.aten.add.Tensor %4440, %4442, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%4444 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4445 = torch.aten.to.dtype %4444, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4446 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4447 = torch.aten.broadcast_to %4445, %4446 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%4448 = torch.valsem.aten.copy %4447, %4443, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%4449 = torch.aten.convolution %4448, %476, %477, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4450 = torch.aten.permute %4449, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%4451 = torch.aten.view %4450, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4452 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4453 = torch.aten.sum.dim_IntList %4451, %4452, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4454 = torch.aten.div.Scalar %4453, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4455 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4456 = torch.aten.broadcast_to %4454, %4455 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4457 = torch.aten.sub.Tensor %4451, %4456, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4458 = torch.aten.mul.Tensor %4457, %4457 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4459 = torch.aten.sum.dim_IntList %4458, %4452, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4460 = torch.aten.div.Scalar %4459, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4461 = torch.aten.add.Scalar %4460, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4462 = torch.aten.rsqrt %4461 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%4463 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4464 = torch.aten.broadcast_to %4462, %4463 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4465 = torch.aten.mul.Tensor %4457, %4464 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4466 = torch.aten.mul.Tensor %4465, %478 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4467 = torch.aten.add.Tensor %4466, %479, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4468 = torch.aten.transpose.int %480, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4469 = torch.aten.view %4467, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4470 = torch.aten.mm %4469, %4468 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4471 = torch.aten.view %4470, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4472 = torch.aten.transpose.int %481, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4473 = torch.aten.view %4467, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4474 = torch.aten.mm %4473, %4472 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4475 = torch.aten.view %4474, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4476 = torch.aten.transpose.int %482, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4477 = torch.aten.view %4467, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4478 = torch.aten.mm %4477, %4476 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4479 = torch.aten.view %4478, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4480 = torch.aten.view %4471, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4481 = torch.aten.permute %4480, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4482 = torch.aten.clone %4481, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4483 = torch.aten.view %4482, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4484 = torch.aten.view %4475, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4485 = torch.aten.permute %4484, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4486 = torch.aten.clone %4485, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4487 = torch.aten.view %4486, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4488 = torch.aten.view %4479, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4489 = torch.aten.permute %4488, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4490 = torch.aten.clone %4489, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4491 = torch.aten.view %4490, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4492 = torch.aten.transpose.int %4487, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%4493 = torch.aten.broadcast_to %4483, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4494 = torch.aten.view %4493, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4495 = torch.aten.broadcast_to %4492, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%4496 = torch.aten.view %4495, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%4497 = torch.aten.bmm %4494, %4496 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%4498 = torch.aten.view %4497, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%4499 = torch.aten.mul.Tensor %4498, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%values_38, %indices_39 = torch.aten.max.dim %4499, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%4500 = torch.aten.sub.Tensor %4499, %values_38, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
%4501 = torch.aten.exp %4500 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%4502 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%4503 = torch.aten.sum.dim_IntList %4501, %4502, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%4504 = torch.aten.div.Tensor %4501, %4503 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
%4505 = torch.aten.broadcast_to %4504, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%4506 = torch.aten.view %4505, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%4507 = torch.aten.broadcast_to %4491, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4508 = torch.aten.view %4507, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4509 = torch.aten.bmm %4506, %4508 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%4510 = torch.aten.view %4509, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4511 = torch.aten.view %4510, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4512 = torch.aten.permute %4511, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4513 = torch.aten.clone %4512, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%4514 = torch.aten.view %4513, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4515 = torch.aten.transpose.int %483, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4516 = torch.aten.view %4514, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4517 = torch.aten.mm %4516, %4515 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4518 = torch.aten.mul.Scalar %484, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4519 = torch.aten.add.Tensor %4518, %4517, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%4520 = torch.aten.view %4519, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4521 = torch.aten.add.Tensor %4520, %4451, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4522 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4523 = torch.aten.sum.dim_IntList %4521, %4522, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4524 = torch.aten.div.Scalar %4523, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4525 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4526 = torch.aten.broadcast_to %4524, %4525 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4527 = torch.aten.sub.Tensor %4521, %4526, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4528 = torch.aten.mul.Tensor %4527, %4527 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4529 = torch.aten.sum.dim_IntList %4528, %4522, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4530 = torch.aten.div.Scalar %4529, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4531 = torch.aten.add.Scalar %4530, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4532 = torch.aten.rsqrt %4531 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%4533 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4534 = torch.aten.broadcast_to %4532, %4533 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4535 = torch.aten.mul.Tensor %4527, %4534 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4536 = torch.aten.mul.Tensor %4535, %485 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4537 = torch.aten.add.Tensor %4536, %486, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4538 = torch.aten.transpose.int %487, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4539 = torch.aten.view %4537, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4540 = torch.aten.mm %4539, %4538 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4541 = torch.aten.view %4540, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4542 = torch.aten.transpose.int %488, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%4543 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%4544 = torch.aten.mm %4543, %4542 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%4545 = torch.aten.view %4544, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%4546 = torch.aten.transpose.int %489, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%4547 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%4548 = torch.aten.mm %4547, %4546 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%4549 = torch.aten.view %4548, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%4550 = torch.aten.view %4541, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4551 = torch.aten.permute %4550, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4552 = torch.aten.clone %4551, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4553 = torch.aten.view %4552, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4554 = torch.aten.view %4545, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%4555 = torch.aten.permute %4554, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%4556 = torch.aten.clone %4555, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%4557 = torch.aten.view %4556, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4558 = torch.aten.view %4549, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%4559 = torch.aten.permute %4558, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%4560 = torch.aten.clone %4559, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%4561 = torch.aten.view %4560, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4562 = torch.aten.transpose.int %4557, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%4563 = torch.aten.broadcast_to %4553, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4564 = torch.aten.view %4563, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4565 = torch.aten.broadcast_to %4562, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%4566 = torch.aten.view %4565, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%4567 = torch.aten.bmm %4564, %4566 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%4568 = torch.aten.view %4567, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%4569 = torch.aten.mul.Tensor %4568, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%values_40, %indices_41 = torch.aten.max.dim %4569, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%4570 = torch.aten.sub.Tensor %4569, %values_40, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
%4571 = torch.aten.exp %4570 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%4572 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%4573 = torch.aten.sum.dim_IntList %4571, %4572, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%4574 = torch.aten.div.Tensor %4571, %4573 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
%4575 = torch.aten.broadcast_to %4574, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%4576 = torch.aten.view %4575, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%4577 = torch.aten.broadcast_to %4561, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4578 = torch.aten.view %4577, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4579 = torch.aten.bmm %4576, %4578 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%4580 = torch.aten.view %4579, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4581 = torch.aten.view %4580, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4582 = torch.aten.permute %4581, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4583 = torch.aten.clone %4582, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%4584 = torch.aten.view %4583, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4585 = torch.aten.transpose.int %490, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4586 = torch.aten.view %4584, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4587 = torch.aten.mm %4586, %4585 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4588 = torch.aten.mul.Scalar %491, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4589 = torch.aten.add.Tensor %4588, %4587, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%4590 = torch.aten.view %4589, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4591 = torch.aten.add.Tensor %4590, %4521, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4592 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4593 = torch.aten.sum.dim_IntList %4591, %4592, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4594 = torch.aten.div.Scalar %4593, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4595 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4596 = torch.aten.broadcast_to %4594, %4595 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4597 = torch.aten.sub.Tensor %4591, %4596, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4598 = torch.aten.mul.Tensor %4597, %4597 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4599 = torch.aten.sum.dim_IntList %4598, %4592, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4600 = torch.aten.div.Scalar %4599, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4601 = torch.aten.add.Scalar %4600, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4602 = torch.aten.rsqrt %4601 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%4603 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4604 = torch.aten.broadcast_to %4602, %4603 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4605 = torch.aten.mul.Tensor %4597, %4604 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4606 = torch.aten.mul.Tensor %4605, %492 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4607 = torch.aten.add.Tensor %4606, %493, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4608 = torch.aten.transpose.int %494, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
%4609 = torch.aten.view %4607, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4610 = torch.aten.mm %4609, %4608 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
%4611 = torch.aten.mul.Scalar %495, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
%4612 = torch.aten.add.Tensor %4611, %4610, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
%4613 = torch.aten.view %4612, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%4614 = torch.aten.slice.Tensor %4613, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%4615 = torch.aten.slice.Tensor %4613, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%4616 = torch.aten.gelu %4615, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%4617 = torch.aten.mul.Tensor %4614, %4616 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%4618 = torch.aten.transpose.int %496, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
%4619 = torch.aten.view %4617, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%4620 = torch.aten.mm %4619, %4618 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
%4621 = torch.aten.mul.Scalar %497, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4622 = torch.aten.add.Tensor %4621, %4620, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%4623 = torch.aten.view %4622, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4624 = torch.aten.add.Tensor %4623, %4591, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4625 = torch.aten.view %4624, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%4626 = torch.aten.permute %4625, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%4627 = torch.aten.convolution %4626, %498, %499, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4628 = torch.aten.add.Tensor %4627, %4415, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4629 = torch.prim.ListConstruct %4628, %1679 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
%4630 = torch.aten.cat %4629, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
%4631 = torch.aten.clone %4630, %int0 : !torch.vtensor<[2,1280,32,32],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
%4632 = torch.prim.ListConstruct %int2, %int32, %int40, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4633 = torch.aten.view %4631, %4632 : !torch.vtensor<[2,1280,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,1024],f16>
%4634 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4635 = torch.aten.to.dtype %4634, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4636 = torch.prim.ListConstruct %int2, %int32, %int40, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4637 = torch.aten.broadcast_to %4635, %4636 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,1024],f32>
%4638 = torch.valsem.aten.copy %4637, %4633, %false : !torch.vtensor<[2,32,40,1024],f32>, !torch.vtensor<[2,32,40,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,40,1024],f32>
%4639 = torch.aten.to.dtype %4638, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,1024],f64>
%4640 = torch.aten.sum.dim_IntList %4639, %754, %true, %none : !torch.vtensor<[2,32,40,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4641 = torch.aten.div.Scalar %4640, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4642 = torch.aten.sub.Tensor %4639, %4641, %float1.000000e00 : !torch.vtensor<[2,32,40,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,1024],f64>
%4643 = torch.aten.mul.Tensor %4642, %4642 : !torch.vtensor<[2,32,40,1024],f64>, !torch.vtensor<[2,32,40,1024],f64> -> !torch.vtensor<[2,32,40,1024],f64>
%4644 = torch.aten.sum.dim_IntList %4643, %754, %true, %none : !torch.vtensor<[2,32,40,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4645 = torch.aten.div.Scalar %4644, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4646 = torch.aten.to.dtype %4645, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4647 = torch.aten.sum.dim_IntList %4638, %754, %true, %none : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4648 = torch.aten.div.Scalar %4647, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4649 = torch.aten.add.Tensor %4646, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4650 = torch.aten.rsqrt %4649 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4651 = torch.aten.sub.Tensor %4633, %4648, %int1 : !torch.vtensor<[2,32,40,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,1024],f32>
%4652 = torch.aten.mul.Tensor %4651, %4650 : !torch.vtensor<[2,32,40,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,1024],f32>
%4653 = torch.prim.ListConstruct %int2, %int1280, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4654 = torch.aten.view %4652, %4653 : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,32,32],f32>
%4655 = torch.aten.unsqueeze %500, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%4656 = torch.aten.unsqueeze %4655, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%4657 = torch.aten.mul.Tensor %4654, %4656 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,32,32],f32>
%4658 = torch.aten.unsqueeze %501, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%4659 = torch.aten.unsqueeze %4658, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%4660 = torch.aten.add.Tensor %4657, %4659, %int1 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f32>
%4661 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4662 = torch.aten.to.dtype %4661, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4663 = torch.prim.ListConstruct %int2, %int1280, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4664 = torch.aten.broadcast_to %4662, %4663 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,32,32],f16>
%4665 = torch.valsem.aten.copy %4664, %4660, %false : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,1280,32,32],f32>, !torch.bool -> !torch.vtensor<[2,1280,32,32],f16>
%4666 = torch.aten.sigmoid %4665 : !torch.vtensor<[2,1280,32,32],f16> -> !torch.vtensor<[2,1280,32,32],f16>
%4667 = torch.aten.mul.Tensor %4666, %4665 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,1280,32,32],f16> -> !torch.vtensor<[2,1280,32,32],f16>
%4668 = torch.aten.convolution %4667, %502, %503, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4669 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4670 = torch.aten.mul.Tensor %4669, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4671 = torch.aten.transpose.int %504, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
%4672 = torch.aten.mm %4670, %4671 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
%4673 = torch.aten.mul.Scalar %505, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4674 = torch.aten.add.Tensor %4673, %4672, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
%4675 = torch.aten.slice.Tensor %4674, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%4676 = torch.aten.slice.Tensor %4675, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%4677 = torch.aten.unsqueeze %4676, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%4678 = torch.aten.unsqueeze %4677, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%4679 = torch.aten.add.Tensor %4668, %4678, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4680 = torch.aten.view %4679, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%4681 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4682 = torch.aten.to.dtype %4681, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4683 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4684 = torch.aten.broadcast_to %4682, %4683 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%4685 = torch.valsem.aten.copy %4684, %4680, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%4686 = torch.aten.to.dtype %4685, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%4687 = torch.aten.sum.dim_IntList %4686, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4688 = torch.aten.div.Scalar %4687, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4689 = torch.aten.sub.Tensor %4686, %4688, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%4690 = torch.aten.mul.Tensor %4689, %4689 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%4691 = torch.aten.sum.dim_IntList %4690, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4692 = torch.aten.div.Scalar %4691, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4693 = torch.aten.to.dtype %4692, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4694 = torch.aten.sum.dim_IntList %4685, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4695 = torch.aten.div.Scalar %4694, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4696 = torch.aten.add.Tensor %4693, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4697 = torch.aten.rsqrt %4696 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4698 = torch.aten.sub.Tensor %4680, %4695, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%4699 = torch.aten.mul.Tensor %4698, %4697 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%4700 = torch.aten.view %4699, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%4701 = torch.aten.unsqueeze %506, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4702 = torch.aten.unsqueeze %4701, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4703 = torch.aten.mul.Tensor %4700, %4702 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%4704 = torch.aten.unsqueeze %507, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4705 = torch.aten.unsqueeze %4704, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4706 = torch.aten.add.Tensor %4703, %4705, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%4707 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4708 = torch.aten.to.dtype %4707, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4709 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4710 = torch.aten.broadcast_to %4708, %4709 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%4711 = torch.valsem.aten.copy %4710, %4706, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%4712 = torch.aten.sigmoid %4711 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%4713 = torch.aten.mul.Tensor %4712, %4711 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%4714 = torch.aten.convolution %4713, %508, %509, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4715 = torch.aten.convolution %4630, %510, %511, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4716 = torch.aten.add.Tensor %4715, %4714, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4717 = torch.aten.div.Tensor %4716, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%4718 = torch.aten.clone %4717, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4719 = torch.aten.view %4718, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%4720 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4721 = torch.aten.to.dtype %4720, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4722 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4723 = torch.aten.broadcast_to %4721, %4722 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%4724 = torch.valsem.aten.copy %4723, %4719, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%4725 = torch.aten.to.dtype %4724, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%4726 = torch.aten.sum.dim_IntList %4725, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4727 = torch.aten.div.Scalar %4726, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4728 = torch.aten.sub.Tensor %4725, %4727, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%4729 = torch.aten.mul.Tensor %4728, %4728 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%4730 = torch.aten.sum.dim_IntList %4729, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4731 = torch.aten.div.Scalar %4730, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4732 = torch.aten.to.dtype %4731, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4733 = torch.aten.sum.dim_IntList %4724, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4734 = torch.aten.div.Scalar %4733, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4735 = torch.aten.add.Tensor %4732, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4736 = torch.aten.rsqrt %4735 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4737 = torch.aten.sub.Tensor %4719, %4734, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%4738 = torch.aten.mul.Tensor %4737, %4736 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%4739 = torch.aten.view %4738, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%4740 = torch.aten.unsqueeze %512, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4741 = torch.aten.unsqueeze %4740, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4742 = torch.aten.mul.Tensor %4739, %4741 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%4743 = torch.aten.unsqueeze %513, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%4744 = torch.aten.unsqueeze %4743, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%4745 = torch.aten.add.Tensor %4742, %4744, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%4746 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4747 = torch.aten.to.dtype %4746, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4748 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4749 = torch.aten.broadcast_to %4747, %4748 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%4750 = torch.valsem.aten.copy %4749, %4745, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%4751 = torch.aten.convolution %4750, %514, %515, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4752 = torch.aten.permute %4751, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%4753 = torch.aten.view %4752, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4754 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4755 = torch.aten.sum.dim_IntList %4753, %4754, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4756 = torch.aten.div.Scalar %4755, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4757 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4758 = torch.aten.broadcast_to %4756, %4757 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4759 = torch.aten.sub.Tensor %4753, %4758, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4760 = torch.aten.mul.Tensor %4759, %4759 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4761 = torch.aten.sum.dim_IntList %4760, %4754, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4762 = torch.aten.div.Scalar %4761, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4763 = torch.aten.add.Scalar %4762, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4764 = torch.aten.rsqrt %4763 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%4765 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4766 = torch.aten.broadcast_to %4764, %4765 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4767 = torch.aten.mul.Tensor %4759, %4766 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4768 = torch.aten.mul.Tensor %4767, %516 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4769 = torch.aten.add.Tensor %4768, %517, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4770 = torch.aten.transpose.int %518, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4771 = torch.aten.view %4769, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4772 = torch.aten.mm %4771, %4770 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4773 = torch.aten.view %4772, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4774 = torch.aten.transpose.int %519, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4775 = torch.aten.view %4769, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4776 = torch.aten.mm %4775, %4774 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4777 = torch.aten.view %4776, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4778 = torch.aten.transpose.int %520, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4779 = torch.aten.view %4769, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4780 = torch.aten.mm %4779, %4778 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4781 = torch.aten.view %4780, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4782 = torch.aten.view %4773, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4783 = torch.aten.permute %4782, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4784 = torch.aten.clone %4783, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4785 = torch.aten.view %4784, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4786 = torch.aten.view %4777, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4787 = torch.aten.permute %4786, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4788 = torch.aten.clone %4787, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4789 = torch.aten.view %4788, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4790 = torch.aten.view %4781, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4791 = torch.aten.permute %4790, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4792 = torch.aten.clone %4791, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4793 = torch.aten.view %4792, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4794 = torch.aten.transpose.int %4789, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%4795 = torch.aten.broadcast_to %4785, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4796 = torch.aten.view %4795, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4797 = torch.aten.broadcast_to %4794, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%4798 = torch.aten.view %4797, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%4799 = torch.aten.bmm %4796, %4798 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%4800 = torch.aten.view %4799, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%4801 = torch.aten.mul.Tensor %4800, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%values_42, %indices_43 = torch.aten.max.dim %4801, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%4802 = torch.aten.sub.Tensor %4801, %values_42, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
%4803 = torch.aten.exp %4802 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%4804 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%4805 = torch.aten.sum.dim_IntList %4803, %4804, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%4806 = torch.aten.div.Tensor %4803, %4805 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
%4807 = torch.aten.broadcast_to %4806, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%4808 = torch.aten.view %4807, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%4809 = torch.aten.broadcast_to %4793, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4810 = torch.aten.view %4809, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4811 = torch.aten.bmm %4808, %4810 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%4812 = torch.aten.view %4811, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4813 = torch.aten.view %4812, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4814 = torch.aten.permute %4813, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4815 = torch.aten.clone %4814, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%4816 = torch.aten.view %4815, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4817 = torch.aten.transpose.int %521, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4818 = torch.aten.view %4816, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4819 = torch.aten.mm %4818, %4817 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4820 = torch.aten.mul.Scalar %522, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4821 = torch.aten.add.Tensor %4820, %4819, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%4822 = torch.aten.view %4821, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4823 = torch.aten.add.Tensor %4822, %4753, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4824 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4825 = torch.aten.sum.dim_IntList %4823, %4824, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4826 = torch.aten.div.Scalar %4825, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4827 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4828 = torch.aten.broadcast_to %4826, %4827 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4829 = torch.aten.sub.Tensor %4823, %4828, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4830 = torch.aten.mul.Tensor %4829, %4829 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4831 = torch.aten.sum.dim_IntList %4830, %4824, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4832 = torch.aten.div.Scalar %4831, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4833 = torch.aten.add.Scalar %4832, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4834 = torch.aten.rsqrt %4833 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%4835 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4836 = torch.aten.broadcast_to %4834, %4835 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4837 = torch.aten.mul.Tensor %4829, %4836 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4838 = torch.aten.mul.Tensor %4837, %523 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4839 = torch.aten.add.Tensor %4838, %524, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4840 = torch.aten.transpose.int %525, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4841 = torch.aten.view %4839, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4842 = torch.aten.mm %4841, %4840 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4843 = torch.aten.view %4842, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4844 = torch.aten.transpose.int %526, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%4845 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%4846 = torch.aten.mm %4845, %4844 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%4847 = torch.aten.view %4846, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%4848 = torch.aten.transpose.int %527, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%4849 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%4850 = torch.aten.mm %4849, %4848 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%4851 = torch.aten.view %4850, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%4852 = torch.aten.view %4843, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4853 = torch.aten.permute %4852, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4854 = torch.aten.clone %4853, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%4855 = torch.aten.view %4854, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4856 = torch.aten.view %4847, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%4857 = torch.aten.permute %4856, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%4858 = torch.aten.clone %4857, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%4859 = torch.aten.view %4858, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4860 = torch.aten.view %4851, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%4861 = torch.aten.permute %4860, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%4862 = torch.aten.clone %4861, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%4863 = torch.aten.view %4862, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4864 = torch.aten.transpose.int %4859, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%4865 = torch.aten.broadcast_to %4855, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4866 = torch.aten.view %4865, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4867 = torch.aten.broadcast_to %4864, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%4868 = torch.aten.view %4867, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%4869 = torch.aten.bmm %4866, %4868 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%4870 = torch.aten.view %4869, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%4871 = torch.aten.mul.Tensor %4870, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%values_44, %indices_45 = torch.aten.max.dim %4871, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%4872 = torch.aten.sub.Tensor %4871, %values_44, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
%4873 = torch.aten.exp %4872 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%4874 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%4875 = torch.aten.sum.dim_IntList %4873, %4874, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%4876 = torch.aten.div.Tensor %4873, %4875 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
%4877 = torch.aten.broadcast_to %4876, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%4878 = torch.aten.view %4877, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%4879 = torch.aten.broadcast_to %4863, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4880 = torch.aten.view %4879, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%4881 = torch.aten.bmm %4878, %4880 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%4882 = torch.aten.view %4881, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%4883 = torch.aten.view %4882, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%4884 = torch.aten.permute %4883, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%4885 = torch.aten.clone %4884, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%4886 = torch.aten.view %4885, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4887 = torch.aten.transpose.int %528, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%4888 = torch.aten.view %4886, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4889 = torch.aten.mm %4888, %4887 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%4890 = torch.aten.mul.Scalar %529, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4891 = torch.aten.add.Tensor %4890, %4889, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%4892 = torch.aten.view %4891, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4893 = torch.aten.add.Tensor %4892, %4823, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4894 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%4895 = torch.aten.sum.dim_IntList %4893, %4894, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4896 = torch.aten.div.Scalar %4895, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4897 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4898 = torch.aten.broadcast_to %4896, %4897 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4899 = torch.aten.sub.Tensor %4893, %4898, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4900 = torch.aten.mul.Tensor %4899, %4899 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4901 = torch.aten.sum.dim_IntList %4900, %4894, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%4902 = torch.aten.div.Scalar %4901, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4903 = torch.aten.add.Scalar %4902, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%4904 = torch.aten.rsqrt %4903 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%4905 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4906 = torch.aten.broadcast_to %4904, %4905 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4907 = torch.aten.mul.Tensor %4899, %4906 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4908 = torch.aten.mul.Tensor %4907, %530 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%4909 = torch.aten.add.Tensor %4908, %531, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4910 = torch.aten.transpose.int %532, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
%4911 = torch.aten.view %4909, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%4912 = torch.aten.mm %4911, %4910 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
%4913 = torch.aten.mul.Scalar %533, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
%4914 = torch.aten.add.Tensor %4913, %4912, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
%4915 = torch.aten.view %4914, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%4916 = torch.aten.slice.Tensor %4915, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%4917 = torch.aten.slice.Tensor %4915, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%4918 = torch.aten.gelu %4917, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%4919 = torch.aten.mul.Tensor %4916, %4918 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%4920 = torch.aten.transpose.int %534, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
%4921 = torch.aten.view %4919, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%4922 = torch.aten.mm %4921, %4920 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
%4923 = torch.aten.mul.Scalar %535, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4924 = torch.aten.add.Tensor %4923, %4922, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%4925 = torch.aten.view %4924, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%4926 = torch.aten.add.Tensor %4925, %4893, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%4927 = torch.aten.view %4926, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%4928 = torch.aten.permute %4927, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%4929 = torch.aten.convolution %4928, %536, %537, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4930 = torch.aten.add.Tensor %4929, %4717, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4931 = torch.prim.ListConstruct %4930, %1362 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,320,32,32],f16>) -> !torch.list<vtensor>
%4932 = torch.aten.cat %4931, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
%4933 = torch.aten.clone %4932, %int0 : !torch.vtensor<[2,960,32,32],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
%4934 = torch.prim.ListConstruct %int2, %int32, %int30, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4935 = torch.aten.view %4933, %4934 : !torch.vtensor<[2,960,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,1024],f16>
%4936 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4937 = torch.aten.to.dtype %4936, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4938 = torch.prim.ListConstruct %int2, %int32, %int30, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4939 = torch.aten.broadcast_to %4937, %4938 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,30,1024],f32>
%4940 = torch.valsem.aten.copy %4939, %4935, %false : !torch.vtensor<[2,32,30,1024],f32>, !torch.vtensor<[2,32,30,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,30,1024],f32>
%4941 = torch.aten.to.dtype %4940, %int7, %false, %false, %none : !torch.vtensor<[2,32,30,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,1024],f64>
%4942 = torch.aten.sum.dim_IntList %4941, %754, %true, %none : !torch.vtensor<[2,32,30,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4943 = torch.aten.div.Scalar %4942, %int30720 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4944 = torch.aten.sub.Tensor %4941, %4943, %float1.000000e00 : !torch.vtensor<[2,32,30,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,30,1024],f64>
%4945 = torch.aten.mul.Tensor %4944, %4944 : !torch.vtensor<[2,32,30,1024],f64>, !torch.vtensor<[2,32,30,1024],f64> -> !torch.vtensor<[2,32,30,1024],f64>
%4946 = torch.aten.sum.dim_IntList %4945, %754, %true, %none : !torch.vtensor<[2,32,30,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4947 = torch.aten.div.Scalar %4946, %int30720 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4948 = torch.aten.to.dtype %4947, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4949 = torch.aten.sum.dim_IntList %4940, %754, %true, %none : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4950 = torch.aten.div.Scalar %4949, %int30720 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4951 = torch.aten.add.Tensor %4948, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4952 = torch.aten.rsqrt %4951 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4953 = torch.aten.sub.Tensor %4935, %4950, %int1 : !torch.vtensor<[2,32,30,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,1024],f32>
%4954 = torch.aten.mul.Tensor %4953, %4952 : !torch.vtensor<[2,32,30,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,1024],f32>
%4955 = torch.prim.ListConstruct %int2, %int960, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4956 = torch.aten.view %4954, %4955 : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,960,32,32],f32>
%4957 = torch.aten.unsqueeze %538, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%4958 = torch.aten.unsqueeze %4957, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%4959 = torch.aten.mul.Tensor %4956, %4958 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,32,32],f32>
%4960 = torch.aten.unsqueeze %539, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%4961 = torch.aten.unsqueeze %4960, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%4962 = torch.aten.add.Tensor %4959, %4961, %int1 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f32>
%4963 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4964 = torch.aten.to.dtype %4963, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%4965 = torch.prim.ListConstruct %int2, %int960, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4966 = torch.aten.broadcast_to %4964, %4965 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,960,32,32],f16>
%4967 = torch.valsem.aten.copy %4966, %4962, %false : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[2,960,32,32],f32>, !torch.bool -> !torch.vtensor<[2,960,32,32],f16>
%4968 = torch.aten.sigmoid %4967 : !torch.vtensor<[2,960,32,32],f16> -> !torch.vtensor<[2,960,32,32],f16>
%4969 = torch.aten.mul.Tensor %4968, %4967 : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[2,960,32,32],f16> -> !torch.vtensor<[2,960,32,32],f16>
%4970 = torch.aten.convolution %4969, %540, %541, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4971 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4972 = torch.aten.mul.Tensor %4971, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4973 = torch.aten.transpose.int %542, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
%4974 = torch.aten.mm %4972, %4973 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16>
%4975 = torch.aten.mul.Scalar %543, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%4976 = torch.aten.add.Tensor %4975, %4974, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16>
%4977 = torch.aten.slice.Tensor %4976, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%4978 = torch.aten.slice.Tensor %4977, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%4979 = torch.aten.unsqueeze %4978, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%4980 = torch.aten.unsqueeze %4979, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%4981 = torch.aten.add.Tensor %4970, %4980, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%4982 = torch.aten.view %4981, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%4983 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%4984 = torch.aten.to.dtype %4983, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%4985 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4986 = torch.aten.broadcast_to %4984, %4985 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%4987 = torch.valsem.aten.copy %4986, %4982, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%4988 = torch.aten.to.dtype %4987, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%4989 = torch.aten.sum.dim_IntList %4988, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4990 = torch.aten.div.Scalar %4989, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4991 = torch.aten.sub.Tensor %4988, %4990, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%4992 = torch.aten.mul.Tensor %4991, %4991 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%4993 = torch.aten.sum.dim_IntList %4992, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%4994 = torch.aten.div.Scalar %4993, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%4995 = torch.aten.to.dtype %4994, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4996 = torch.aten.sum.dim_IntList %4987, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%4997 = torch.aten.div.Scalar %4996, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4998 = torch.aten.add.Tensor %4995, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4999 = torch.aten.rsqrt %4998 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5000 = torch.aten.sub.Tensor %4982, %4997, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%5001 = torch.aten.mul.Tensor %5000, %4999 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%5002 = torch.aten.view %5001, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%5003 = torch.aten.unsqueeze %544, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5004 = torch.aten.unsqueeze %5003, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5005 = torch.aten.mul.Tensor %5002, %5004 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%5006 = torch.aten.unsqueeze %545, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5007 = torch.aten.unsqueeze %5006, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5008 = torch.aten.add.Tensor %5005, %5007, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%5009 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5010 = torch.aten.to.dtype %5009, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5011 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5012 = torch.aten.broadcast_to %5010, %5011 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%5013 = torch.valsem.aten.copy %5012, %5008, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%5014 = torch.aten.sigmoid %5013 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%5015 = torch.aten.mul.Tensor %5014, %5013 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%5016 = torch.aten.convolution %5015, %546, %547, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5017 = torch.aten.convolution %4932, %548, %549, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5018 = torch.aten.add.Tensor %5017, %5016, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5019 = torch.aten.div.Tensor %5018, %5 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%5020 = torch.aten.clone %5019, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5021 = torch.aten.view %5020, %1412 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%5022 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5023 = torch.aten.to.dtype %5022, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5024 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5025 = torch.aten.broadcast_to %5023, %5024 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32>
%5026 = torch.valsem.aten.copy %5025, %5021, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32>
%5027 = torch.aten.to.dtype %5026, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64>
%5028 = torch.aten.sum.dim_IntList %5027, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5029 = torch.aten.div.Scalar %5028, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5030 = torch.aten.sub.Tensor %5027, %5029, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64>
%5031 = torch.aten.mul.Tensor %5030, %5030 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64>
%5032 = torch.aten.sum.dim_IntList %5031, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5033 = torch.aten.div.Scalar %5032, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5034 = torch.aten.to.dtype %5033, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5035 = torch.aten.sum.dim_IntList %5026, %754, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5036 = torch.aten.div.Scalar %5035, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5037 = torch.aten.add.Tensor %5034, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5038 = torch.aten.rsqrt %5037 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5039 = torch.aten.sub.Tensor %5021, %5036, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%5040 = torch.aten.mul.Tensor %5039, %5038 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%5041 = torch.aten.view %5040, %1433 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%5042 = torch.aten.unsqueeze %550, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5043 = torch.aten.unsqueeze %5042, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5044 = torch.aten.mul.Tensor %5041, %5043 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%5045 = torch.aten.unsqueeze %551, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5046 = torch.aten.unsqueeze %5045, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5047 = torch.aten.add.Tensor %5044, %5046, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%5048 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5049 = torch.aten.to.dtype %5048, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5050 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5051 = torch.aten.broadcast_to %5049, %5050 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%5052 = torch.valsem.aten.copy %5051, %5047, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%5053 = torch.aten.convolution %5052, %552, %553, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5054 = torch.aten.permute %5053, %866 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%5055 = torch.aten.view %5054, %1487 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5056 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5057 = torch.aten.sum.dim_IntList %5055, %5056, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%5058 = torch.aten.div.Scalar %5057, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5059 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5060 = torch.aten.broadcast_to %5058, %5059 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5061 = torch.aten.sub.Tensor %5055, %5060, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5062 = torch.aten.mul.Tensor %5061, %5061 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5063 = torch.aten.sum.dim_IntList %5062, %5056, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%5064 = torch.aten.div.Scalar %5063, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5065 = torch.aten.add.Scalar %5064, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5066 = torch.aten.rsqrt %5065 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%5067 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5068 = torch.aten.broadcast_to %5066, %5067 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5069 = torch.aten.mul.Tensor %5061, %5068 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5070 = torch.aten.mul.Tensor %5069, %554 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5071 = torch.aten.add.Tensor %5070, %555, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5072 = torch.aten.transpose.int %556, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%5073 = torch.aten.view %5071, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5074 = torch.aten.mm %5073, %5072 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%5075 = torch.aten.view %5074, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5076 = torch.aten.transpose.int %557, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%5077 = torch.aten.view %5071, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5078 = torch.aten.mm %5077, %5076 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%5079 = torch.aten.view %5078, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5080 = torch.aten.transpose.int %558, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%5081 = torch.aten.view %5071, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5082 = torch.aten.mm %5081, %5080 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%5083 = torch.aten.view %5082, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5084 = torch.aten.view %5075, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%5085 = torch.aten.permute %5084, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%5086 = torch.aten.clone %5085, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%5087 = torch.aten.view %5086, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5088 = torch.aten.view %5079, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%5089 = torch.aten.permute %5088, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%5090 = torch.aten.clone %5089, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%5091 = torch.aten.view %5090, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5092 = torch.aten.view %5083, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%5093 = torch.aten.permute %5092, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%5094 = torch.aten.clone %5093, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%5095 = torch.aten.view %5094, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5096 = torch.aten.transpose.int %5091, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%5097 = torch.aten.broadcast_to %5087, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5098 = torch.aten.view %5097, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5099 = torch.aten.broadcast_to %5096, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%5100 = torch.aten.view %5099, %1535 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%5101 = torch.aten.bmm %5098, %5100 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%5102 = torch.aten.view %5101, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%5103 = torch.aten.mul.Tensor %5102, %2 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%values_46, %indices_47 = torch.aten.max.dim %5103, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%5104 = torch.aten.sub.Tensor %5103, %values_46, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16>
%5105 = torch.aten.exp %5104 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%5106 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%5107 = torch.aten.sum.dim_IntList %5105, %5106, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%5108 = torch.aten.div.Tensor %5105, %5107 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16>
%5109 = torch.aten.broadcast_to %5108, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%5110 = torch.aten.view %5109, %1539 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%5111 = torch.aten.broadcast_to %5095, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5112 = torch.aten.view %5111, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5113 = torch.aten.bmm %5110, %5112 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%5114 = torch.aten.view %5113, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5115 = torch.aten.view %5114, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%5116 = torch.aten.permute %5115, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%5117 = torch.aten.clone %5116, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%5118 = torch.aten.view %5117, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5119 = torch.aten.transpose.int %559, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%5120 = torch.aten.view %5118, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5121 = torch.aten.mm %5120, %5119 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%5122 = torch.aten.mul.Scalar %560, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%5123 = torch.aten.add.Tensor %5122, %5121, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%5124 = torch.aten.view %5123, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5125 = torch.aten.add.Tensor %5124, %5055, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5126 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5127 = torch.aten.sum.dim_IntList %5125, %5126, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%5128 = torch.aten.div.Scalar %5127, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5129 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5130 = torch.aten.broadcast_to %5128, %5129 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5131 = torch.aten.sub.Tensor %5125, %5130, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5132 = torch.aten.mul.Tensor %5131, %5131 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5133 = torch.aten.sum.dim_IntList %5132, %5126, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%5134 = torch.aten.div.Scalar %5133, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5135 = torch.aten.add.Scalar %5134, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5136 = torch.aten.rsqrt %5135 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%5137 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5138 = torch.aten.broadcast_to %5136, %5137 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5139 = torch.aten.mul.Tensor %5131, %5138 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5140 = torch.aten.mul.Tensor %5139, %561 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5141 = torch.aten.add.Tensor %5140, %562, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5142 = torch.aten.transpose.int %563, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%5143 = torch.aten.view %5141, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5144 = torch.aten.mm %5143, %5142 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%5145 = torch.aten.view %5144, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5146 = torch.aten.transpose.int %564, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%5147 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%5148 = torch.aten.mm %5147, %5146 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%5149 = torch.aten.view %5148, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%5150 = torch.aten.transpose.int %565, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16>
%5151 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%5152 = torch.aten.mm %5151, %5150 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%5153 = torch.aten.view %5152, %1588 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%5154 = torch.aten.view %5145, %1518 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%5155 = torch.aten.permute %5154, %901 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%5156 = torch.aten.clone %5155, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%5157 = torch.aten.view %5156, %1522 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5158 = torch.aten.view %5149, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%5159 = torch.aten.permute %5158, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%5160 = torch.aten.clone %5159, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%5161 = torch.aten.view %5160, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%5162 = torch.aten.view %5153, %1598 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%5163 = torch.aten.permute %5162, %901 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%5164 = torch.aten.clone %5163, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%5165 = torch.aten.view %5164, %1602 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%5166 = torch.aten.transpose.int %5161, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%5167 = torch.aten.broadcast_to %5157, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5168 = torch.aten.view %5167, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5169 = torch.aten.broadcast_to %5166, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%5170 = torch.aten.view %5169, %1611 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%5171 = torch.aten.bmm %5168, %5170 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%5172 = torch.aten.view %5171, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%5173 = torch.aten.mul.Tensor %5172, %2 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%values_48, %indices_49 = torch.aten.max.dim %5173, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64>
%5174 = torch.aten.sub.Tensor %5173, %values_48, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16>
%5175 = torch.aten.exp %5174 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%5176 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%5177 = torch.aten.sum.dim_IntList %5175, %5176, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16>
%5178 = torch.aten.div.Tensor %5175, %5177 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16>
%5179 = torch.aten.broadcast_to %5178, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%5180 = torch.aten.view %5179, %1615 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%5181 = torch.aten.broadcast_to %5165, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%5182 = torch.aten.view %5181, %1602 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%5183 = torch.aten.bmm %5180, %5182 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%5184 = torch.aten.view %5183, %1522 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%5185 = torch.aten.view %5184, %1553 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%5186 = torch.aten.permute %5185, %901 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%5187 = torch.aten.clone %5186, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%5188 = torch.aten.view %5187, %1487 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5189 = torch.aten.transpose.int %566, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
%5190 = torch.aten.view %5188, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5191 = torch.aten.mm %5190, %5189 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%5192 = torch.aten.mul.Scalar %567, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%5193 = torch.aten.add.Tensor %5192, %5191, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%5194 = torch.aten.view %5193, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5195 = torch.aten.add.Tensor %5194, %5125, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5196 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5197 = torch.aten.sum.dim_IntList %5195, %5196, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%5198 = torch.aten.div.Scalar %5197, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5199 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5200 = torch.aten.broadcast_to %5198, %5199 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5201 = torch.aten.sub.Tensor %5195, %5200, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5202 = torch.aten.mul.Tensor %5201, %5201 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5203 = torch.aten.sum.dim_IntList %5202, %5196, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16>
%5204 = torch.aten.div.Scalar %5203, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5205 = torch.aten.add.Scalar %5204, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16>
%5206 = torch.aten.rsqrt %5205 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16>
%5207 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5208 = torch.aten.broadcast_to %5206, %5207 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5209 = torch.aten.mul.Tensor %5201, %5208 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5210 = torch.aten.mul.Tensor %5209, %568 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16>
%5211 = torch.aten.add.Tensor %5210, %569, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5212 = torch.aten.transpose.int %570, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
%5213 = torch.aten.view %5211, %1506 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%5214 = torch.aten.mm %5213, %5212 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16>
%5215 = torch.aten.mul.Scalar %571, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
%5216 = torch.aten.add.Tensor %5215, %5214, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16>
%5217 = torch.aten.view %5216, %1661 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%5218 = torch.aten.slice.Tensor %5217, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%5219 = torch.aten.slice.Tensor %5217, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%5220 = torch.aten.gelu %5219, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%5221 = torch.aten.mul.Tensor %5218, %5220 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%5222 = torch.aten.transpose.int %572, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
%5223 = torch.aten.view %5221, %1668 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%5224 = torch.aten.mm %5223, %5222 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16>
%5225 = torch.aten.mul.Scalar %573, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
%5226 = torch.aten.add.Tensor %5225, %5224, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16>
%5227 = torch.aten.view %5226, %1487 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%5228 = torch.aten.add.Tensor %5227, %5195, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%5229 = torch.aten.view %5228, %1675 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%5230 = torch.aten.permute %5229, %1060 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%5231 = torch.aten.convolution %5230, %574, %575, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5232 = torch.aten.add.Tensor %5231, %5019, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%5233 = torch.aten.upsample_nearest2d.vec %5232, %none, %3418 : !torch.vtensor<[2,640,32,32],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,640,64,64],f16>
%5234 = torch.aten.convolution %5233, %576, %577, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%5235 = torch.prim.ListConstruct %5234, %1360 : (!torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
%5236 = torch.aten.cat %5235, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
%5237 = torch.aten.clone %5236, %int0 : !torch.vtensor<[2,960,64,64],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
%5238 = torch.prim.ListConstruct %int2, %int32, %int30, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5239 = torch.aten.view %5237, %5238 : !torch.vtensor<[2,960,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,4096],f16>
%5240 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5241 = torch.aten.to.dtype %5240, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5242 = torch.prim.ListConstruct %int2, %int32, %int30, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5243 = torch.aten.broadcast_to %5241, %5242 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,30,4096],f32>
%5244 = torch.valsem.aten.copy %5243, %5239, %false : !torch.vtensor<[2,32,30,4096],f32>, !torch.vtensor<[2,32,30,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,30,4096],f32>
%5245 = torch.aten.to.dtype %5244, %int7, %false, %false, %none : !torch.vtensor<[2,32,30,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,4096],f64>
%5246 = torch.aten.sum.dim_IntList %5245, %754, %true, %none : !torch.vtensor<[2,32,30,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5247 = torch.aten.div.Scalar %5246, %int122880 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5248 = torch.aten.sub.Tensor %5245, %5247, %float1.000000e00 : !torch.vtensor<[2,32,30,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,30,4096],f64>
%5249 = torch.aten.mul.Tensor %5248, %5248 : !torch.vtensor<[2,32,30,4096],f64>, !torch.vtensor<[2,32,30,4096],f64> -> !torch.vtensor<[2,32,30,4096],f64>
%5250 = torch.aten.sum.dim_IntList %5249, %754, %true, %none : !torch.vtensor<[2,32,30,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5251 = torch.aten.div.Scalar %5250, %int122880 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5252 = torch.aten.to.dtype %5251, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5253 = torch.aten.sum.dim_IntList %5244, %754, %true, %none : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5254 = torch.aten.div.Scalar %5253, %int122880 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5255 = torch.aten.add.Tensor %5252, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5256 = torch.aten.rsqrt %5255 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5257 = torch.aten.sub.Tensor %5239, %5254, %int1 : !torch.vtensor<[2,32,30,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,4096],f32>
%5258 = torch.aten.mul.Tensor %5257, %5256 : !torch.vtensor<[2,32,30,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,4096],f32>
%5259 = torch.prim.ListConstruct %int2, %int960, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5260 = torch.aten.view %5258, %5259 : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,960,64,64],f32>
%5261 = torch.aten.unsqueeze %578, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%5262 = torch.aten.unsqueeze %5261, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%5263 = torch.aten.mul.Tensor %5260, %5262 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,64,64],f32>
%5264 = torch.aten.unsqueeze %579, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%5265 = torch.aten.unsqueeze %5264, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%5266 = torch.aten.add.Tensor %5263, %5265, %int1 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f32>
%5267 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5268 = torch.aten.to.dtype %5267, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5269 = torch.prim.ListConstruct %int2, %int960, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5270 = torch.aten.broadcast_to %5268, %5269 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,960,64,64],f16>
%5271 = torch.valsem.aten.copy %5270, %5266, %false : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[2,960,64,64],f32>, !torch.bool -> !torch.vtensor<[2,960,64,64],f16>
%5272 = torch.aten.sigmoid %5271 : !torch.vtensor<[2,960,64,64],f16> -> !torch.vtensor<[2,960,64,64],f16>
%5273 = torch.aten.mul.Tensor %5272, %5271 : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[2,960,64,64],f16> -> !torch.vtensor<[2,960,64,64],f16>
%5274 = torch.aten.convolution %5273, %580, %581, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5275 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%5276 = torch.aten.mul.Tensor %5275, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%5277 = torch.aten.transpose.int %582, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%5278 = torch.aten.mm %5276, %5277 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
%5279 = torch.aten.mul.Scalar %583, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5280 = torch.aten.add.Tensor %5279, %5278, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
%5281 = torch.aten.slice.Tensor %5280, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%5282 = torch.aten.slice.Tensor %5281, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%5283 = torch.aten.unsqueeze %5282, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%5284 = torch.aten.unsqueeze %5283, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%5285 = torch.aten.add.Tensor %5274, %5284, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5286 = torch.aten.view %5285, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%5287 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5288 = torch.aten.to.dtype %5287, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5289 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5290 = torch.aten.broadcast_to %5288, %5289 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%5291 = torch.valsem.aten.copy %5290, %5286, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%5292 = torch.aten.to.dtype %5291, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%5293 = torch.aten.sum.dim_IntList %5292, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5294 = torch.aten.div.Scalar %5293, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5295 = torch.aten.sub.Tensor %5292, %5294, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%5296 = torch.aten.mul.Tensor %5295, %5295 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%5297 = torch.aten.sum.dim_IntList %5296, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5298 = torch.aten.div.Scalar %5297, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5299 = torch.aten.to.dtype %5298, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5300 = torch.aten.sum.dim_IntList %5291, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5301 = torch.aten.div.Scalar %5300, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5302 = torch.aten.add.Tensor %5299, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5303 = torch.aten.rsqrt %5302 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5304 = torch.aten.sub.Tensor %5286, %5301, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%5305 = torch.aten.mul.Tensor %5304, %5303 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%5306 = torch.aten.view %5305, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%5307 = torch.aten.unsqueeze %584, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5308 = torch.aten.unsqueeze %5307, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5309 = torch.aten.mul.Tensor %5306, %5308 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%5310 = torch.aten.unsqueeze %585, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5311 = torch.aten.unsqueeze %5310, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5312 = torch.aten.add.Tensor %5309, %5311, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%5313 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5314 = torch.aten.to.dtype %5313, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5315 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5316 = torch.aten.broadcast_to %5314, %5315 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5317 = torch.valsem.aten.copy %5316, %5312, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%5318 = torch.aten.sigmoid %5317 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%5319 = torch.aten.mul.Tensor %5318, %5317 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%5320 = torch.aten.convolution %5319, %586, %587, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5321 = torch.aten.convolution %5236, %588, %589, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5322 = torch.aten.add.Tensor %5321, %5320, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5323 = torch.aten.div.Tensor %5322, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%5324 = torch.aten.clone %5323, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5325 = torch.aten.view %5324, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%5326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5327 = torch.aten.to.dtype %5326, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5328 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5329 = torch.aten.broadcast_to %5327, %5328 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%5330 = torch.valsem.aten.copy %5329, %5325, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%5331 = torch.aten.to.dtype %5330, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%5332 = torch.aten.sum.dim_IntList %5331, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5333 = torch.aten.div.Scalar %5332, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5334 = torch.aten.sub.Tensor %5331, %5333, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%5335 = torch.aten.mul.Tensor %5334, %5334 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%5336 = torch.aten.sum.dim_IntList %5335, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5337 = torch.aten.div.Scalar %5336, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5338 = torch.aten.to.dtype %5337, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5339 = torch.aten.sum.dim_IntList %5330, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5340 = torch.aten.div.Scalar %5339, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5341 = torch.aten.add.Tensor %5338, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5342 = torch.aten.rsqrt %5341 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5343 = torch.aten.sub.Tensor %5325, %5340, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%5344 = torch.aten.mul.Tensor %5343, %5342 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%5345 = torch.aten.view %5344, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%5346 = torch.aten.unsqueeze %590, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5347 = torch.aten.unsqueeze %5346, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5348 = torch.aten.mul.Tensor %5345, %5347 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%5349 = torch.aten.unsqueeze %591, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5350 = torch.aten.unsqueeze %5349, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5351 = torch.aten.add.Tensor %5348, %5350, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%5352 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5353 = torch.aten.to.dtype %5352, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5354 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5355 = torch.aten.broadcast_to %5353, %5354 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5356 = torch.valsem.aten.copy %5355, %5351, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%5357 = torch.aten.convolution %5356, %592, %593, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5358 = torch.aten.permute %5357, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%5359 = torch.aten.view %5358, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5360 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5361 = torch.aten.sum.dim_IntList %5359, %5360, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5362 = torch.aten.div.Scalar %5361, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5363 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5364 = torch.aten.broadcast_to %5362, %5363 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5365 = torch.aten.sub.Tensor %5359, %5364, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5366 = torch.aten.mul.Tensor %5365, %5365 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5367 = torch.aten.sum.dim_IntList %5366, %5360, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5368 = torch.aten.div.Scalar %5367, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5369 = torch.aten.add.Scalar %5368, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5370 = torch.aten.rsqrt %5369 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5371 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5372 = torch.aten.broadcast_to %5370, %5371 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5373 = torch.aten.mul.Tensor %5365, %5372 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5374 = torch.aten.mul.Tensor %5373, %594 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5375 = torch.aten.add.Tensor %5374, %595, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5376 = torch.aten.transpose.int %596, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5377 = torch.aten.view %5375, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5378 = torch.aten.mm %5377, %5376 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5379 = torch.aten.view %5378, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5380 = torch.aten.transpose.int %597, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5381 = torch.aten.view %5375, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5382 = torch.aten.mm %5381, %5380 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5383 = torch.aten.view %5382, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5384 = torch.aten.transpose.int %598, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5385 = torch.aten.view %5375, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5386 = torch.aten.mm %5385, %5384 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5387 = torch.aten.view %5386, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5388 = torch.aten.view %5379, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5389 = torch.aten.permute %5388, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5390 = torch.aten.clone %5389, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5391 = torch.aten.view %5390, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5392 = torch.aten.view %5383, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5393 = torch.aten.permute %5392, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5394 = torch.aten.clone %5393, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5395 = torch.aten.view %5394, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5396 = torch.aten.view %5387, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5397 = torch.aten.permute %5396, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5398 = torch.aten.clone %5397, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5399 = torch.aten.view %5398, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5400 = torch.aten.transpose.int %5395, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%5401 = torch.aten.broadcast_to %5391, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5402 = torch.aten.view %5401, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5403 = torch.aten.broadcast_to %5400, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%5404 = torch.aten.view %5403, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%5405 = torch.aten.bmm %5402, %5404 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%5406 = torch.aten.view %5405, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%5407 = torch.aten.mul.Tensor %5406, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%values_50, %indices_51 = torch.aten.max.dim %5407, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%5408 = torch.aten.sub.Tensor %5407, %values_50, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
%5409 = torch.aten.exp %5408 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%5410 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%5411 = torch.aten.sum.dim_IntList %5409, %5410, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%5412 = torch.aten.div.Tensor %5409, %5411 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
%5413 = torch.aten.broadcast_to %5412, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%5414 = torch.aten.view %5413, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%5415 = torch.aten.broadcast_to %5399, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5416 = torch.aten.view %5415, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5417 = torch.aten.bmm %5414, %5416 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%5418 = torch.aten.view %5417, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5419 = torch.aten.view %5418, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5420 = torch.aten.permute %5419, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5421 = torch.aten.clone %5420, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%5422 = torch.aten.view %5421, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5423 = torch.aten.transpose.int %599, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5424 = torch.aten.view %5422, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5425 = torch.aten.mm %5424, %5423 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5426 = torch.aten.mul.Scalar %600, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5427 = torch.aten.add.Tensor %5426, %5425, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%5428 = torch.aten.view %5427, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5429 = torch.aten.add.Tensor %5428, %5359, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5430 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5431 = torch.aten.sum.dim_IntList %5429, %5430, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5432 = torch.aten.div.Scalar %5431, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5433 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5434 = torch.aten.broadcast_to %5432, %5433 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5435 = torch.aten.sub.Tensor %5429, %5434, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5436 = torch.aten.mul.Tensor %5435, %5435 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5437 = torch.aten.sum.dim_IntList %5436, %5430, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5438 = torch.aten.div.Scalar %5437, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5439 = torch.aten.add.Scalar %5438, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5440 = torch.aten.rsqrt %5439 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5441 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5442 = torch.aten.broadcast_to %5440, %5441 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5443 = torch.aten.mul.Tensor %5435, %5442 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5444 = torch.aten.mul.Tensor %5443, %601 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5445 = torch.aten.add.Tensor %5444, %602, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5446 = torch.aten.transpose.int %603, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5447 = torch.aten.view %5445, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5448 = torch.aten.mm %5447, %5446 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5449 = torch.aten.view %5448, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5450 = torch.aten.transpose.int %604, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%5451 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%5452 = torch.aten.mm %5451, %5450 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%5453 = torch.aten.view %5452, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%5454 = torch.aten.transpose.int %605, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%5455 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%5456 = torch.aten.mm %5455, %5454 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%5457 = torch.aten.view %5456, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%5458 = torch.aten.view %5449, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5459 = torch.aten.permute %5458, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5460 = torch.aten.clone %5459, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5461 = torch.aten.view %5460, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5462 = torch.aten.view %5453, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%5463 = torch.aten.permute %5462, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%5464 = torch.aten.clone %5463, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%5465 = torch.aten.view %5464, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5466 = torch.aten.view %5457, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%5467 = torch.aten.permute %5466, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%5468 = torch.aten.clone %5467, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%5469 = torch.aten.view %5468, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5470 = torch.aten.transpose.int %5465, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%5471 = torch.aten.broadcast_to %5461, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5472 = torch.aten.view %5471, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5473 = torch.aten.broadcast_to %5470, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%5474 = torch.aten.view %5473, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%5475 = torch.aten.bmm %5472, %5474 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%5476 = torch.aten.view %5475, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%5477 = torch.aten.mul.Tensor %5476, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%values_52, %indices_53 = torch.aten.max.dim %5477, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%5478 = torch.aten.sub.Tensor %5477, %values_52, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
%5479 = torch.aten.exp %5478 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%5480 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%5481 = torch.aten.sum.dim_IntList %5479, %5480, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%5482 = torch.aten.div.Tensor %5479, %5481 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
%5483 = torch.aten.broadcast_to %5482, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%5484 = torch.aten.view %5483, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%5485 = torch.aten.broadcast_to %5469, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5486 = torch.aten.view %5485, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5487 = torch.aten.bmm %5484, %5486 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%5488 = torch.aten.view %5487, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5489 = torch.aten.view %5488, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5490 = torch.aten.permute %5489, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5491 = torch.aten.clone %5490, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%5492 = torch.aten.view %5491, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5493 = torch.aten.transpose.int %606, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5494 = torch.aten.view %5492, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5495 = torch.aten.mm %5494, %5493 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5496 = torch.aten.mul.Scalar %607, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5497 = torch.aten.add.Tensor %5496, %5495, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%5498 = torch.aten.view %5497, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5499 = torch.aten.add.Tensor %5498, %5429, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5500 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5501 = torch.aten.sum.dim_IntList %5499, %5500, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5502 = torch.aten.div.Scalar %5501, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5503 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5504 = torch.aten.broadcast_to %5502, %5503 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5505 = torch.aten.sub.Tensor %5499, %5504, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5506 = torch.aten.mul.Tensor %5505, %5505 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5507 = torch.aten.sum.dim_IntList %5506, %5500, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5508 = torch.aten.div.Scalar %5507, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5509 = torch.aten.add.Scalar %5508, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5510 = torch.aten.rsqrt %5509 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5511 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5512 = torch.aten.broadcast_to %5510, %5511 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5513 = torch.aten.mul.Tensor %5505, %5512 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5514 = torch.aten.mul.Tensor %5513, %608 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5515 = torch.aten.add.Tensor %5514, %609, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5516 = torch.aten.transpose.int %610, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
%5517 = torch.aten.view %5515, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5518 = torch.aten.mm %5517, %5516 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
%5519 = torch.aten.mul.Scalar %611, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
%5520 = torch.aten.add.Tensor %5519, %5518, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
%5521 = torch.aten.view %5520, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%5522 = torch.aten.slice.Tensor %5521, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%5523 = torch.aten.slice.Tensor %5521, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%5524 = torch.aten.gelu %5523, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%5525 = torch.aten.mul.Tensor %5522, %5524 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%5526 = torch.aten.transpose.int %612, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%5527 = torch.aten.view %5525, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%5528 = torch.aten.mm %5527, %5526 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
%5529 = torch.aten.mul.Scalar %613, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5530 = torch.aten.add.Tensor %5529, %5528, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%5531 = torch.aten.view %5530, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5532 = torch.aten.add.Tensor %5531, %5499, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5533 = torch.aten.view %5532, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%5534 = torch.aten.permute %5533, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5535 = torch.aten.convolution %5534, %614, %615, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5536 = torch.aten.add.Tensor %5535, %5323, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5537 = torch.prim.ListConstruct %5536, %1063 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
%5538 = torch.aten.cat %5537, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%5539 = torch.aten.clone %5538, %int0 : !torch.vtensor<[2,640,64,64],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%5540 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5541 = torch.aten.view %5539, %5540 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
%5542 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5543 = torch.aten.to.dtype %5542, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5544 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5545 = torch.aten.broadcast_to %5543, %5544 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f32>
%5546 = torch.valsem.aten.copy %5545, %5541, %false : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,20,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,20,4096],f32>
%5547 = torch.aten.to.dtype %5546, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f64>
%5548 = torch.aten.sum.dim_IntList %5547, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5549 = torch.aten.div.Scalar %5548, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5550 = torch.aten.sub.Tensor %5547, %5549, %float1.000000e00 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,4096],f64>
%5551 = torch.aten.mul.Tensor %5550, %5550 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,20,4096],f64> -> !torch.vtensor<[2,32,20,4096],f64>
%5552 = torch.aten.sum.dim_IntList %5551, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5553 = torch.aten.div.Scalar %5552, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5554 = torch.aten.to.dtype %5553, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5555 = torch.aten.sum.dim_IntList %5546, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5556 = torch.aten.div.Scalar %5555, %int81920 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5557 = torch.aten.add.Tensor %5554, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5558 = torch.aten.rsqrt %5557 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5559 = torch.aten.sub.Tensor %5541, %5556, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
%5560 = torch.aten.mul.Tensor %5559, %5558 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
%5561 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5562 = torch.aten.view %5560, %5561 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
%5563 = torch.aten.unsqueeze %616, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5564 = torch.aten.unsqueeze %5563, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5565 = torch.aten.mul.Tensor %5562, %5564 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
%5566 = torch.aten.unsqueeze %617, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5567 = torch.aten.unsqueeze %5566, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5568 = torch.aten.add.Tensor %5565, %5567, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
%5569 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5570 = torch.aten.to.dtype %5569, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5571 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5572 = torch.aten.broadcast_to %5570, %5571 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f16>
%5573 = torch.valsem.aten.copy %5572, %5568, %false : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f32>, !torch.bool -> !torch.vtensor<[2,640,64,64],f16>
%5574 = torch.aten.sigmoid %5573 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
%5575 = torch.aten.mul.Tensor %5574, %5573 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
%5576 = torch.aten.convolution %5575, %618, %619, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5577 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%5578 = torch.aten.mul.Tensor %5577, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%5579 = torch.aten.transpose.int %620, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%5580 = torch.aten.mm %5578, %5579 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
%5581 = torch.aten.mul.Scalar %621, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5582 = torch.aten.add.Tensor %5581, %5580, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
%5583 = torch.aten.slice.Tensor %5582, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%5584 = torch.aten.slice.Tensor %5583, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%5585 = torch.aten.unsqueeze %5584, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%5586 = torch.aten.unsqueeze %5585, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%5587 = torch.aten.add.Tensor %5576, %5586, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5588 = torch.aten.view %5587, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%5589 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5590 = torch.aten.to.dtype %5589, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5591 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5592 = torch.aten.broadcast_to %5590, %5591 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%5593 = torch.valsem.aten.copy %5592, %5588, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%5594 = torch.aten.to.dtype %5593, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%5595 = torch.aten.sum.dim_IntList %5594, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5596 = torch.aten.div.Scalar %5595, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5597 = torch.aten.sub.Tensor %5594, %5596, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%5598 = torch.aten.mul.Tensor %5597, %5597 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%5599 = torch.aten.sum.dim_IntList %5598, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5600 = torch.aten.div.Scalar %5599, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5601 = torch.aten.to.dtype %5600, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5602 = torch.aten.sum.dim_IntList %5593, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5603 = torch.aten.div.Scalar %5602, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5604 = torch.aten.add.Tensor %5601, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5605 = torch.aten.rsqrt %5604 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5606 = torch.aten.sub.Tensor %5588, %5603, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%5607 = torch.aten.mul.Tensor %5606, %5605 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%5608 = torch.aten.view %5607, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%5609 = torch.aten.unsqueeze %622, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5610 = torch.aten.unsqueeze %5609, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5611 = torch.aten.mul.Tensor %5608, %5610 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%5612 = torch.aten.unsqueeze %623, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5613 = torch.aten.unsqueeze %5612, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5614 = torch.aten.add.Tensor %5611, %5613, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%5615 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5616 = torch.aten.to.dtype %5615, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5617 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5618 = torch.aten.broadcast_to %5616, %5617 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5619 = torch.valsem.aten.copy %5618, %5614, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%5620 = torch.aten.sigmoid %5619 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%5621 = torch.aten.mul.Tensor %5620, %5619 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%5622 = torch.aten.convolution %5621, %624, %625, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5623 = torch.aten.convolution %5538, %626, %627, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5624 = torch.aten.add.Tensor %5623, %5622, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5625 = torch.aten.div.Tensor %5624, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%5626 = torch.aten.clone %5625, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5627 = torch.aten.view %5626, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%5628 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5629 = torch.aten.to.dtype %5628, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5630 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5631 = torch.aten.broadcast_to %5629, %5630 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%5632 = torch.valsem.aten.copy %5631, %5627, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%5633 = torch.aten.to.dtype %5632, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%5634 = torch.aten.sum.dim_IntList %5633, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5635 = torch.aten.div.Scalar %5634, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5636 = torch.aten.sub.Tensor %5633, %5635, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%5637 = torch.aten.mul.Tensor %5636, %5636 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%5638 = torch.aten.sum.dim_IntList %5637, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5639 = torch.aten.div.Scalar %5638, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5640 = torch.aten.to.dtype %5639, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5641 = torch.aten.sum.dim_IntList %5632, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5642 = torch.aten.div.Scalar %5641, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5643 = torch.aten.add.Tensor %5640, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5644 = torch.aten.rsqrt %5643 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5645 = torch.aten.sub.Tensor %5627, %5642, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%5646 = torch.aten.mul.Tensor %5645, %5644 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%5647 = torch.aten.view %5646, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%5648 = torch.aten.unsqueeze %628, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5649 = torch.aten.unsqueeze %5648, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5650 = torch.aten.mul.Tensor %5647, %5649 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%5651 = torch.aten.unsqueeze %629, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5652 = torch.aten.unsqueeze %5651, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5653 = torch.aten.add.Tensor %5650, %5652, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%5654 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5655 = torch.aten.to.dtype %5654, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5656 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5657 = torch.aten.broadcast_to %5655, %5656 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5658 = torch.valsem.aten.copy %5657, %5653, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%5659 = torch.aten.convolution %5658, %630, %631, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5660 = torch.aten.permute %5659, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%5661 = torch.aten.view %5660, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5662 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5663 = torch.aten.sum.dim_IntList %5661, %5662, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5664 = torch.aten.div.Scalar %5663, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5665 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5666 = torch.aten.broadcast_to %5664, %5665 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5667 = torch.aten.sub.Tensor %5661, %5666, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5668 = torch.aten.mul.Tensor %5667, %5667 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5669 = torch.aten.sum.dim_IntList %5668, %5662, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5670 = torch.aten.div.Scalar %5669, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5671 = torch.aten.add.Scalar %5670, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5672 = torch.aten.rsqrt %5671 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5673 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5674 = torch.aten.broadcast_to %5672, %5673 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5675 = torch.aten.mul.Tensor %5667, %5674 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5676 = torch.aten.mul.Tensor %5675, %632 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5677 = torch.aten.add.Tensor %5676, %633, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5678 = torch.aten.transpose.int %634, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5679 = torch.aten.view %5677, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5680 = torch.aten.mm %5679, %5678 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5681 = torch.aten.view %5680, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5682 = torch.aten.transpose.int %635, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5683 = torch.aten.view %5677, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5684 = torch.aten.mm %5683, %5682 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5685 = torch.aten.view %5684, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5686 = torch.aten.transpose.int %636, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5687 = torch.aten.view %5677, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5688 = torch.aten.mm %5687, %5686 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5689 = torch.aten.view %5688, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5690 = torch.aten.view %5681, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5691 = torch.aten.permute %5690, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5692 = torch.aten.clone %5691, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5693 = torch.aten.view %5692, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5694 = torch.aten.view %5685, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5695 = torch.aten.permute %5694, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5696 = torch.aten.clone %5695, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5697 = torch.aten.view %5696, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5698 = torch.aten.view %5689, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5699 = torch.aten.permute %5698, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5700 = torch.aten.clone %5699, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5701 = torch.aten.view %5700, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5702 = torch.aten.transpose.int %5697, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%5703 = torch.aten.broadcast_to %5693, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5704 = torch.aten.view %5703, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5705 = torch.aten.broadcast_to %5702, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%5706 = torch.aten.view %5705, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%5707 = torch.aten.bmm %5704, %5706 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%5708 = torch.aten.view %5707, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%5709 = torch.aten.mul.Tensor %5708, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%values_54, %indices_55 = torch.aten.max.dim %5709, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%5710 = torch.aten.sub.Tensor %5709, %values_54, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
%5711 = torch.aten.exp %5710 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%5712 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%5713 = torch.aten.sum.dim_IntList %5711, %5712, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%5714 = torch.aten.div.Tensor %5711, %5713 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
%5715 = torch.aten.broadcast_to %5714, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%5716 = torch.aten.view %5715, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%5717 = torch.aten.broadcast_to %5701, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5718 = torch.aten.view %5717, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5719 = torch.aten.bmm %5716, %5718 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%5720 = torch.aten.view %5719, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5721 = torch.aten.view %5720, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5722 = torch.aten.permute %5721, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5723 = torch.aten.clone %5722, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%5724 = torch.aten.view %5723, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5725 = torch.aten.transpose.int %637, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5726 = torch.aten.view %5724, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5727 = torch.aten.mm %5726, %5725 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5728 = torch.aten.mul.Scalar %638, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5729 = torch.aten.add.Tensor %5728, %5727, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%5730 = torch.aten.view %5729, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5731 = torch.aten.add.Tensor %5730, %5661, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5732 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5733 = torch.aten.sum.dim_IntList %5731, %5732, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5734 = torch.aten.div.Scalar %5733, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5735 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5736 = torch.aten.broadcast_to %5734, %5735 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5737 = torch.aten.sub.Tensor %5731, %5736, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5738 = torch.aten.mul.Tensor %5737, %5737 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5739 = torch.aten.sum.dim_IntList %5738, %5732, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5740 = torch.aten.div.Scalar %5739, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5741 = torch.aten.add.Scalar %5740, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5742 = torch.aten.rsqrt %5741 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5743 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5744 = torch.aten.broadcast_to %5742, %5743 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5745 = torch.aten.mul.Tensor %5737, %5744 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5746 = torch.aten.mul.Tensor %5745, %639 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5747 = torch.aten.add.Tensor %5746, %640, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5748 = torch.aten.transpose.int %641, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5749 = torch.aten.view %5747, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5750 = torch.aten.mm %5749, %5748 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5751 = torch.aten.view %5750, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5752 = torch.aten.transpose.int %642, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%5753 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%5754 = torch.aten.mm %5753, %5752 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%5755 = torch.aten.view %5754, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%5756 = torch.aten.transpose.int %643, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%5757 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%5758 = torch.aten.mm %5757, %5756 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%5759 = torch.aten.view %5758, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%5760 = torch.aten.view %5751, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5761 = torch.aten.permute %5760, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5762 = torch.aten.clone %5761, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5763 = torch.aten.view %5762, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5764 = torch.aten.view %5755, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%5765 = torch.aten.permute %5764, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%5766 = torch.aten.clone %5765, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%5767 = torch.aten.view %5766, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5768 = torch.aten.view %5759, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%5769 = torch.aten.permute %5768, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%5770 = torch.aten.clone %5769, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%5771 = torch.aten.view %5770, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5772 = torch.aten.transpose.int %5767, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%5773 = torch.aten.broadcast_to %5763, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5774 = torch.aten.view %5773, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5775 = torch.aten.broadcast_to %5772, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%5776 = torch.aten.view %5775, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%5777 = torch.aten.bmm %5774, %5776 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%5778 = torch.aten.view %5777, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%5779 = torch.aten.mul.Tensor %5778, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%values_56, %indices_57 = torch.aten.max.dim %5779, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%5780 = torch.aten.sub.Tensor %5779, %values_56, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
%5781 = torch.aten.exp %5780 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%5782 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%5783 = torch.aten.sum.dim_IntList %5781, %5782, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%5784 = torch.aten.div.Tensor %5781, %5783 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
%5785 = torch.aten.broadcast_to %5784, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%5786 = torch.aten.view %5785, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%5787 = torch.aten.broadcast_to %5771, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5788 = torch.aten.view %5787, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%5789 = torch.aten.bmm %5786, %5788 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%5790 = torch.aten.view %5789, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5791 = torch.aten.view %5790, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5792 = torch.aten.permute %5791, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5793 = torch.aten.clone %5792, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%5794 = torch.aten.view %5793, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5795 = torch.aten.transpose.int %644, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5796 = torch.aten.view %5794, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5797 = torch.aten.mm %5796, %5795 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5798 = torch.aten.mul.Scalar %645, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5799 = torch.aten.add.Tensor %5798, %5797, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%5800 = torch.aten.view %5799, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5801 = torch.aten.add.Tensor %5800, %5731, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5802 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5803 = torch.aten.sum.dim_IntList %5801, %5802, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5804 = torch.aten.div.Scalar %5803, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5805 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5806 = torch.aten.broadcast_to %5804, %5805 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5807 = torch.aten.sub.Tensor %5801, %5806, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5808 = torch.aten.mul.Tensor %5807, %5807 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5809 = torch.aten.sum.dim_IntList %5808, %5802, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5810 = torch.aten.div.Scalar %5809, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5811 = torch.aten.add.Scalar %5810, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5812 = torch.aten.rsqrt %5811 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5813 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5814 = torch.aten.broadcast_to %5812, %5813 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5815 = torch.aten.mul.Tensor %5807, %5814 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5816 = torch.aten.mul.Tensor %5815, %646 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5817 = torch.aten.add.Tensor %5816, %647, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5818 = torch.aten.transpose.int %648, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
%5819 = torch.aten.view %5817, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5820 = torch.aten.mm %5819, %5818 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
%5821 = torch.aten.mul.Scalar %649, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
%5822 = torch.aten.add.Tensor %5821, %5820, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
%5823 = torch.aten.view %5822, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%5824 = torch.aten.slice.Tensor %5823, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%5825 = torch.aten.slice.Tensor %5823, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%5826 = torch.aten.gelu %5825, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%5827 = torch.aten.mul.Tensor %5824, %5826 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%5828 = torch.aten.transpose.int %650, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%5829 = torch.aten.view %5827, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%5830 = torch.aten.mm %5829, %5828 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
%5831 = torch.aten.mul.Scalar %651, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5832 = torch.aten.add.Tensor %5831, %5830, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%5833 = torch.aten.view %5832, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5834 = torch.aten.add.Tensor %5833, %5801, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5835 = torch.aten.view %5834, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%5836 = torch.aten.permute %5835, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5837 = torch.aten.convolution %5836, %652, %653, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5838 = torch.aten.add.Tensor %5837, %5625, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5839 = torch.prim.ListConstruct %5838, %746 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
%5840 = torch.aten.cat %5839, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%5841 = torch.aten.view %5840, %5540 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
%5842 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5843 = torch.aten.to.dtype %5842, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5844 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5845 = torch.aten.broadcast_to %5843, %5844 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f32>
%5846 = torch.valsem.aten.copy %5845, %5841, %false : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,20,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,20,4096],f32>
%5847 = torch.aten.to.dtype %5846, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f64>
%5848 = torch.aten.sum.dim_IntList %5847, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5849 = torch.aten.div.Scalar %5848, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5850 = torch.aten.sub.Tensor %5847, %5849, %float1.000000e00 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,4096],f64>
%5851 = torch.aten.mul.Tensor %5850, %5850 : !torch.vtensor<[2,32,20,4096],f64>, !torch.vtensor<[2,32,20,4096],f64> -> !torch.vtensor<[2,32,20,4096],f64>
%5852 = torch.aten.sum.dim_IntList %5851, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5853 = torch.aten.div.Scalar %5852, %int81920 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5854 = torch.aten.to.dtype %5853, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5855 = torch.aten.sum.dim_IntList %5846, %754, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5856 = torch.aten.div.Scalar %5855, %int81920 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5857 = torch.aten.add.Tensor %5854, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5858 = torch.aten.rsqrt %5857 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5859 = torch.aten.sub.Tensor %5841, %5856, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
%5860 = torch.aten.mul.Tensor %5859, %5858 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
%5861 = torch.aten.view %5860, %5561 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
%5862 = torch.aten.unsqueeze %654, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5863 = torch.aten.unsqueeze %5862, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5864 = torch.aten.mul.Tensor %5861, %5863 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
%5865 = torch.aten.unsqueeze %655, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%5866 = torch.aten.unsqueeze %5865, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%5867 = torch.aten.add.Tensor %5864, %5866, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
%5868 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5869 = torch.aten.to.dtype %5868, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5870 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5871 = torch.aten.broadcast_to %5869, %5870 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f16>
%5872 = torch.valsem.aten.copy %5871, %5867, %false : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f32>, !torch.bool -> !torch.vtensor<[2,640,64,64],f16>
%5873 = torch.aten.sigmoid %5872 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
%5874 = torch.aten.mul.Tensor %5873, %5872 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
%5875 = torch.aten.convolution %5874, %656, %657, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5876 = torch.aten.sigmoid %743 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%5877 = torch.aten.mul.Tensor %5876, %743 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%5878 = torch.aten.transpose.int %658, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%5879 = torch.aten.mm %5877, %5878 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16>
%5880 = torch.aten.mul.Scalar %659, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%5881 = torch.aten.add.Tensor %5880, %5879, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16>
%5882 = torch.aten.slice.Tensor %5881, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%5883 = torch.aten.slice.Tensor %5882, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%5884 = torch.aten.unsqueeze %5883, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%5885 = torch.aten.unsqueeze %5884, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%5886 = torch.aten.add.Tensor %5875, %5885, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5887 = torch.aten.view %5886, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%5888 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5889 = torch.aten.to.dtype %5888, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5890 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5891 = torch.aten.broadcast_to %5889, %5890 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%5892 = torch.valsem.aten.copy %5891, %5887, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%5893 = torch.aten.to.dtype %5892, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%5894 = torch.aten.sum.dim_IntList %5893, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5895 = torch.aten.div.Scalar %5894, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5896 = torch.aten.sub.Tensor %5893, %5895, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%5897 = torch.aten.mul.Tensor %5896, %5896 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%5898 = torch.aten.sum.dim_IntList %5897, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5899 = torch.aten.div.Scalar %5898, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5900 = torch.aten.to.dtype %5899, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5901 = torch.aten.sum.dim_IntList %5892, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5902 = torch.aten.div.Scalar %5901, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5903 = torch.aten.add.Tensor %5900, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5904 = torch.aten.rsqrt %5903 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5905 = torch.aten.sub.Tensor %5887, %5902, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%5906 = torch.aten.mul.Tensor %5905, %5904 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%5907 = torch.aten.view %5906, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%5908 = torch.aten.unsqueeze %660, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5909 = torch.aten.unsqueeze %5908, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5910 = torch.aten.mul.Tensor %5907, %5909 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%5911 = torch.aten.unsqueeze %661, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5912 = torch.aten.unsqueeze %5911, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5913 = torch.aten.add.Tensor %5910, %5912, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%5914 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5915 = torch.aten.to.dtype %5914, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5916 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5917 = torch.aten.broadcast_to %5915, %5916 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5918 = torch.valsem.aten.copy %5917, %5913, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%5919 = torch.aten.sigmoid %5918 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%5920 = torch.aten.mul.Tensor %5919, %5918 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%5921 = torch.aten.convolution %5920, %662, %663, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5922 = torch.aten.convolution %5840, %664, %665, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5923 = torch.aten.add.Tensor %5922, %5921, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5924 = torch.aten.div.Tensor %5923, %5 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%5925 = torch.aten.view %5924, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%5926 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5927 = torch.aten.to.dtype %5926, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%5928 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5929 = torch.aten.broadcast_to %5927, %5928 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%5930 = torch.valsem.aten.copy %5929, %5925, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%5931 = torch.aten.to.dtype %5930, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%5932 = torch.aten.sum.dim_IntList %5931, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5933 = torch.aten.div.Scalar %5932, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5934 = torch.aten.sub.Tensor %5931, %5933, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%5935 = torch.aten.mul.Tensor %5934, %5934 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%5936 = torch.aten.sum.dim_IntList %5935, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%5937 = torch.aten.div.Scalar %5936, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%5938 = torch.aten.to.dtype %5937, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5939 = torch.aten.sum.dim_IntList %5930, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%5940 = torch.aten.div.Scalar %5939, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5941 = torch.aten.add.Tensor %5938, %4, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%5942 = torch.aten.rsqrt %5941 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%5943 = torch.aten.sub.Tensor %5925, %5940, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%5944 = torch.aten.mul.Tensor %5943, %5942 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%5945 = torch.aten.view %5944, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%5946 = torch.aten.unsqueeze %666, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5947 = torch.aten.unsqueeze %5946, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5948 = torch.aten.mul.Tensor %5945, %5947 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%5949 = torch.aten.unsqueeze %667, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%5950 = torch.aten.unsqueeze %5949, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%5951 = torch.aten.add.Tensor %5948, %5950, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%5952 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%5953 = torch.aten.to.dtype %5952, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%5954 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5955 = torch.aten.broadcast_to %5953, %5954 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%5956 = torch.valsem.aten.copy %5955, %5951, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%5957 = torch.aten.convolution %5956, %668, %669, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%5958 = torch.aten.permute %5957, %866 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%5959 = torch.aten.view %5958, %868 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5960 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%5961 = torch.aten.sum.dim_IntList %5959, %5960, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5962 = torch.aten.div.Scalar %5961, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5963 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5964 = torch.aten.broadcast_to %5962, %5963 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5965 = torch.aten.sub.Tensor %5959, %5964, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5966 = torch.aten.mul.Tensor %5965, %5965 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5967 = torch.aten.sum.dim_IntList %5966, %5960, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%5968 = torch.aten.div.Scalar %5967, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5969 = torch.aten.add.Scalar %5968, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%5970 = torch.aten.rsqrt %5969 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%5971 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%5972 = torch.aten.broadcast_to %5970, %5971 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5973 = torch.aten.mul.Tensor %5965, %5972 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5974 = torch.aten.mul.Tensor %5973, %670 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%5975 = torch.aten.add.Tensor %5974, %671, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%5976 = torch.aten.transpose.int %672, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5977 = torch.aten.view %5975, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5978 = torch.aten.mm %5977, %5976 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5979 = torch.aten.view %5978, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5980 = torch.aten.transpose.int %673, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5981 = torch.aten.view %5975, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5982 = torch.aten.mm %5981, %5980 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5983 = torch.aten.view %5982, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5984 = torch.aten.transpose.int %674, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%5985 = torch.aten.view %5975, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%5986 = torch.aten.mm %5985, %5984 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%5987 = torch.aten.view %5986, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%5988 = torch.aten.view %5979, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5989 = torch.aten.permute %5988, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5990 = torch.aten.clone %5989, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5991 = torch.aten.view %5990, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5992 = torch.aten.view %5983, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5993 = torch.aten.permute %5992, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5994 = torch.aten.clone %5993, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5995 = torch.aten.view %5994, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%5996 = torch.aten.view %5987, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%5997 = torch.aten.permute %5996, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%5998 = torch.aten.clone %5997, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%5999 = torch.aten.view %5998, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6000 = torch.aten.transpose.int %5995, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%6001 = torch.aten.broadcast_to %5991, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6002 = torch.aten.view %6001, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6003 = torch.aten.broadcast_to %6000, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%6004 = torch.aten.view %6003, %917 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%6005 = torch.aten.bmm %6002, %6004 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%6006 = torch.aten.view %6005, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%6007 = torch.aten.mul.Tensor %6006, %3 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%values_58, %indices_59 = torch.aten.max.dim %6007, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%6008 = torch.aten.sub.Tensor %6007, %values_58, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16>
%6009 = torch.aten.exp %6008 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%6010 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%6011 = torch.aten.sum.dim_IntList %6009, %6010, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%6012 = torch.aten.div.Tensor %6009, %6011 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16>
%6013 = torch.aten.broadcast_to %6012, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%6014 = torch.aten.view %6013, %921 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%6015 = torch.aten.broadcast_to %5999, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6016 = torch.aten.view %6015, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6017 = torch.aten.bmm %6014, %6016 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%6018 = torch.aten.view %6017, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6019 = torch.aten.view %6018, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%6020 = torch.aten.permute %6019, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%6021 = torch.aten.clone %6020, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%6022 = torch.aten.view %6021, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6023 = torch.aten.transpose.int %675, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%6024 = torch.aten.view %6022, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%6025 = torch.aten.mm %6024, %6023 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%6026 = torch.aten.mul.Scalar %676, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%6027 = torch.aten.add.Tensor %6026, %6025, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%6028 = torch.aten.view %6027, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6029 = torch.aten.add.Tensor %6028, %5959, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6030 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%6031 = torch.aten.sum.dim_IntList %6029, %6030, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%6032 = torch.aten.div.Scalar %6031, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%6033 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%6034 = torch.aten.broadcast_to %6032, %6033 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6035 = torch.aten.sub.Tensor %6029, %6034, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6036 = torch.aten.mul.Tensor %6035, %6035 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%6037 = torch.aten.sum.dim_IntList %6036, %6030, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%6038 = torch.aten.div.Scalar %6037, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%6039 = torch.aten.add.Scalar %6038, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%6040 = torch.aten.rsqrt %6039 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%6041 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%6042 = torch.aten.broadcast_to %6040, %6041 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6043 = torch.aten.mul.Tensor %6035, %6042 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%6044 = torch.aten.mul.Tensor %6043, %677 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%6045 = torch.aten.add.Tensor %6044, %678, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6046 = torch.aten.transpose.int %679, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%6047 = torch.aten.view %6045, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%6048 = torch.aten.mm %6047, %6046 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%6049 = torch.aten.view %6048, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6050 = torch.aten.transpose.int %680, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%6051 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%6052 = torch.aten.mm %6051, %6050 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%6053 = torch.aten.view %6052, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%6054 = torch.aten.transpose.int %681, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16>
%6055 = torch.aten.view %arg2, %968 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%6056 = torch.aten.mm %6055, %6054 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%6057 = torch.aten.view %6056, %971 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%6058 = torch.aten.view %6049, %899 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%6059 = torch.aten.permute %6058, %901 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%6060 = torch.aten.clone %6059, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%6061 = torch.aten.view %6060, %904 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6062 = torch.aten.view %6053, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%6063 = torch.aten.permute %6062, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%6064 = torch.aten.clone %6063, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%6065 = torch.aten.view %6064, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%6066 = torch.aten.view %6057, %981 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%6067 = torch.aten.permute %6066, %901 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%6068 = torch.aten.clone %6067, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%6069 = torch.aten.view %6068, %985 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%6070 = torch.aten.transpose.int %6065, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%6071 = torch.aten.broadcast_to %6061, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6072 = torch.aten.view %6071, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6073 = torch.aten.broadcast_to %6070, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%6074 = torch.aten.view %6073, %994 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%6075 = torch.aten.bmm %6072, %6074 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%6076 = torch.aten.view %6075, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%6077 = torch.aten.mul.Tensor %6076, %3 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%values_60, %indices_61 = torch.aten.max.dim %6077, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64>
%6078 = torch.aten.sub.Tensor %6077, %values_60, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16>
%6079 = torch.aten.exp %6078 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%6080 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%6081 = torch.aten.sum.dim_IntList %6079, %6080, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16>
%6082 = torch.aten.div.Tensor %6079, %6081 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16>
%6083 = torch.aten.broadcast_to %6082, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%6084 = torch.aten.view %6083, %998 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%6085 = torch.aten.broadcast_to %6069, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%6086 = torch.aten.view %6085, %985 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%6087 = torch.aten.bmm %6084, %6086 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%6088 = torch.aten.view %6087, %904 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%6089 = torch.aten.view %6088, %935 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%6090 = torch.aten.permute %6089, %901 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%6091 = torch.aten.clone %6090, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%6092 = torch.aten.view %6091, %868 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6093 = torch.aten.transpose.int %682, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
%6094 = torch.aten.view %6092, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%6095 = torch.aten.mm %6094, %6093 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%6096 = torch.aten.mul.Scalar %683, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%6097 = torch.aten.add.Tensor %6096, %6095, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%6098 = torch.aten.view %6097, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6099 = torch.aten.add.Tensor %6098, %6029, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6100 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%6101 = torch.aten.sum.dim_IntList %6099, %6100, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%6102 = torch.aten.div.Scalar %6101, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%6103 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%6104 = torch.aten.broadcast_to %6102, %6103 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6105 = torch.aten.sub.Tensor %6099, %6104, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6106 = torch.aten.mul.Tensor %6105, %6105 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%6107 = torch.aten.sum.dim_IntList %6106, %6100, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16>
%6108 = torch.aten.div.Scalar %6107, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%6109 = torch.aten.add.Scalar %6108, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16>
%6110 = torch.aten.rsqrt %6109 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16>
%6111 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%6112 = torch.aten.broadcast_to %6110, %6111 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6113 = torch.aten.mul.Tensor %6105, %6112 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16>
%6114 = torch.aten.mul.Tensor %6113, %684 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16>
%6115 = torch.aten.add.Tensor %6114, %685, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6116 = torch.aten.transpose.int %686, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
%6117 = torch.aten.view %6115, %887 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%6118 = torch.aten.mm %6117, %6116 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16>
%6119 = torch.aten.mul.Scalar %687, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
%6120 = torch.aten.add.Tensor %6119, %6118, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16>
%6121 = torch.aten.view %6120, %1044 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%6122 = torch.aten.slice.Tensor %6121, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%6123 = torch.aten.slice.Tensor %6121, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%6124 = torch.aten.gelu %6123, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%6125 = torch.aten.mul.Tensor %6122, %6124 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%6126 = torch.aten.transpose.int %688, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
%6127 = torch.aten.view %6125, %1051 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%6128 = torch.aten.mm %6127, %6126 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16>
%6129 = torch.aten.mul.Scalar %689, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
%6130 = torch.aten.add.Tensor %6129, %6128, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16>
%6131 = torch.aten.view %6130, %868 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%6132 = torch.aten.add.Tensor %6131, %6099, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%6133 = torch.aten.view %6132, %1058 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%6134 = torch.aten.permute %6133, %1060 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%6135 = torch.aten.convolution %6134, %690, %691, %744, %745, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%6136 = torch.aten.add.Tensor %6135, %5924, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%6137 = torch.aten.clone %6136, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%6138 = torch.aten.view %6137, %747 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%6139 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%6140 = torch.aten.to.dtype %6139, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
%6141 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%6142 = torch.aten.broadcast_to %6140, %6141 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32>
%6143 = torch.valsem.aten.copy %6142, %6138, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32>
%6144 = torch.aten.to.dtype %6143, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64>
%6145 = torch.aten.sum.dim_IntList %6144, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%6146 = torch.aten.div.Scalar %6145, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%6147 = torch.aten.sub.Tensor %6144, %6146, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64>
%6148 = torch.aten.mul.Tensor %6147, %6147 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64>
%6149 = torch.aten.sum.dim_IntList %6148, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64>
%6150 = torch.aten.div.Scalar %6149, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64>
%6151 = torch.aten.to.dtype %6150, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%6152 = torch.aten.sum.dim_IntList %6143, %754, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%6153 = torch.aten.div.Scalar %6152, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%6154 = torch.aten.add.Tensor %6151, %6, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%6155 = torch.aten.rsqrt %6154 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%6156 = torch.aten.sub.Tensor %6138, %6153, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%6157 = torch.aten.mul.Tensor %6156, %6155 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%6158 = torch.aten.view %6157, %769 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%6159 = torch.aten.unsqueeze %692, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%6160 = torch.aten.unsqueeze %6159, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%6161 = torch.aten.mul.Tensor %6158, %6160 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%6162 = torch.aten.unsqueeze %693, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%6163 = torch.aten.unsqueeze %6162, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%6164 = torch.aten.add.Tensor %6161, %6163, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%6165 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
%6166 = torch.aten.to.dtype %6165, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
%6167 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%6168 = torch.aten.broadcast_to %6166, %6167 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%6169 = torch.valsem.aten.copy %6168, %6164, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%6170 = torch.aten.sigmoid %6169 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%6171 = torch.aten.mul.Tensor %6170, %6169 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%6172 = torch.aten.convolution %6171, %694, %695, %744, %744, %744, %false, %745, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[4,320,3,3],f16>, !torch.vtensor<[4],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,4,64,64],f16>
%6173 = torch.aten.slice.Tensor %6172, %int0, %int0, %int1, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
%6174 = torch.aten.slice.Tensor %6172, %int0, %int1, %int2, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
%6175 = torch.aten.sub.Tensor %6174, %6173, %int1 : !torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
%6176 = torch.aten.mul.Tensor %6175, %0 : !torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,4,64,64],f16>
%6177 = torch.aten.add.Tensor %6173, %6176, %int1 : !torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>, !torch.int -> !torch.vtensor<[1,4,64,64],f16>
return %6177 : !torch.vtensor<[1,4,64,64],f16>
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment