Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created October 19, 2022 06:20
Show Gist options
  • Save pashu123/f15936f87435d8f5be47ff567fc666fa to your computer and use it in GitHub Desktop.
Save pashu123/f15936f87435d8f5be47ff567fc666fa to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
#loc0 = loc(unknown)
module attributes {torch.debug_module_name = "_lambda"} {
func.func @forward(%arg0: !torch.vtensor<[2,4,64,64],f16> loc(unknown), %arg1: !torch.vtensor<[],si64> loc(unknown), %arg2: !torch.vtensor<[2,77,768],f16> loc(unknown)) -> !torch.vtensor<[2,4,64,64],f16> {
%int64 = torch.constant.int 64 loc(#loc1)
%int320 = torch.constant.int 320 loc(#loc1)
%int2 = torch.constant.int 2 loc(#loc1)
%int40960 = torch.constant.int 40960 loc(#loc1)
%int4096 = torch.constant.int 4096 loc(#loc1)
%int10 = torch.constant.int 10 loc(#loc1)
%int32 = torch.constant.int 32 loc(#loc1)
%int640 = torch.constant.int 640 loc(#loc1)
%int81920 = torch.constant.int 81920 loc(#loc1)
%int20 = torch.constant.int 20 loc(#loc1)
%int960 = torch.constant.int 960 loc(#loc1)
%int122880 = torch.constant.int 122880 loc(#loc1)
%int30 = torch.constant.int 30 loc(#loc1)
%int1024 = torch.constant.int 1024 loc(#loc1)
%int20480 = torch.constant.int 20480 loc(#loc1)
%int30720 = torch.constant.int 30720 loc(#loc1)
%int1280 = torch.constant.int 1280 loc(#loc1)
%int40 = torch.constant.int 40 loc(#loc1)
%int1920 = torch.constant.int 1920 loc(#loc1)
%int61440 = torch.constant.int 61440 loc(#loc1)
%int60 = torch.constant.int 60 loc(#loc1)
%int256 = torch.constant.int 256 loc(#loc1)
%int16 = torch.constant.int 16 loc(#loc1)
%int10240 = torch.constant.int 10240 loc(#loc1)
%int15360 = torch.constant.int 15360 loc(#loc1)
%int2560 = torch.constant.int 2560 loc(#loc1)
%int80 = torch.constant.int 80 loc(#loc1)
%int8 = torch.constant.int 8 loc(#loc1)
%int5120 = torch.constant.int 5120 loc(#loc1)
%int1 = torch.constant.int 1 loc(#loc1)
%float1.000000e00 = torch.constant.float 1.000000e+00 loc(#loc1)
%int7 = torch.constant.int 7 loc(#loc1)
%float0.000000e00 = torch.constant.float 0.000000e+00 loc(#loc1)
%int160 = torch.constant.int 160 loc(#loc1)
%0 = torch.vtensor.literal(dense<0.079056941504209485> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%1 = torch.vtensor.literal(dense<0.11180339887498948> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%2 = torch.vtensor.literal(dense<0.15811388300841897> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%3 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%4 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%5 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%6 = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64> loc(#loc1)
%7 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64> loc(#loc1)
%8 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
%9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16> loc(#loc0)
%10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16> loc(#loc0)
%14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
%21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
%53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
%54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
%56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
%57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
%67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
%99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
%100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
%102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
%103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x20x1x3x3xf16>) : !torch.vtensor<[640,20,1,3,3],f16> loc(#loc0)
%113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xsi8>) : !torch.vtensor<[640,320,3,3],si8> loc(#loc0)
%114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
%116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x20x1x1x1xf16>) : !torch.vtensor<[640,20,1,1,1],f16> loc(#loc0)
%123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xsi8>) : !torch.vtensor<[640,320,1,1],si8> loc(#loc0)
%124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
%151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
%152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
%153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
%154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
%155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
%165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
%197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
%198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
%199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
%200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
%201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x40x1x3x3xf16>) : !torch.vtensor<[1280,40,1,3,3],f16> loc(#loc0)
%211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xsi8>) : !torch.vtensor<[1280,640,3,3],si8> loc(#loc0)
%212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x40x1x1x1xf16>) : !torch.vtensor<[1280,40,1,1,1],f16> loc(#loc0)
%221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xsi8>) : !torch.vtensor<[1280,640,1,1],si8> loc(#loc0)
%222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
%249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
%250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
%251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
%252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
%253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
%295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
%296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
%297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
%298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
%299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
%368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
%369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
%370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
%371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
%372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
%391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
%392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
%401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
%402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
%406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
%407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
%416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
%417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
%421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
%422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
%431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
%432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
%439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
%440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
%449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
%450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
%477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
%478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
%479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
%480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
%481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
%488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
%489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
%498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
%499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
%526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
%527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
%528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
%529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
%530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
%535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
%536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x120x1x3x3xf16>) : !torch.vtensor<[1280,120,1,3,3],f16> loc(#loc0)
%537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xsi8>) : !torch.vtensor<[1280,1920,3,3],si8> loc(#loc0)
%538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
%540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x120x1x1x1xf16>) : !torch.vtensor<[1280,120,1,1,1],f16> loc(#loc0)
%547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xsi8>) : !torch.vtensor<[1280,1920,1,1],si8> loc(#loc0)
%548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
%570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
%571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
%572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
%573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
%575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
%576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
%577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
%578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
%579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
%581 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
%582 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%583 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
%584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
%585 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%586 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
%587 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
%588 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x120x1x3x3xf16>) : !torch.vtensor<[640,120,1,3,3],f16> loc(#loc0)
%589 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xsi8>) : !torch.vtensor<[640,1920,3,3],si8> loc(#loc0)
%590 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%591 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
%592 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%593 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%594 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%595 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%596 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%597 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%598 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x120x1x1x1xf16>) : !torch.vtensor<[640,120,1,1,1],f16> loc(#loc0)
%599 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xsi8>) : !torch.vtensor<[640,1920,1,1],si8> loc(#loc0)
%600 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%601 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%602 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%605 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%606 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%607 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%608 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%609 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%610 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%611 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%612 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%613 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%614 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%615 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%616 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%617 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%618 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%619 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%620 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%621 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%622 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%623 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%624 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%625 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%626 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
%627 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
%628 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
%629 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
%630 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
%631 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%632 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%633 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%634 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%635 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%636 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
%637 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x80x1x3x3xf16>) : !torch.vtensor<[640,80,1,3,3],f16> loc(#loc0)
%638 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xsi8>) : !torch.vtensor<[640,1280,3,3],si8> loc(#loc0)
%639 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%640 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
%641 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%642 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%643 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%644 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%645 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x80x1x1x1xf16>) : !torch.vtensor<[640,80,1,1,1],f16> loc(#loc0)
%648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xsi8>) : !torch.vtensor<[640,1280,1,1],si8> loc(#loc0)
%649 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%650 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%651 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%652 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%653 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%654 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%655 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%656 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%657 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%658 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%659 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%660 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%661 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%662 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%663 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%664 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%665 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%666 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%667 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%668 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%669 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%670 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%671 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%672 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%673 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%674 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%675 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
%676 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
%677 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
%678 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
%679 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
%680 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%681 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%682 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%683 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%684 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
%685 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
%686 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x60x1x3x3xf16>) : !torch.vtensor<[640,60,1,3,3],f16> loc(#loc0)
%687 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xsi8>) : !torch.vtensor<[640,960,3,3],si8> loc(#loc0)
%688 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%689 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
%690 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%691 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%692 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%693 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%694 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%695 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%696 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x60x1x1x1xf16>) : !torch.vtensor<[640,60,1,1,1],f16> loc(#loc0)
%697 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xsi8>) : !torch.vtensor<[640,960,1,1],si8> loc(#loc0)
%698 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%699 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%700 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%701 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%702 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%703 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%704 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%705 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%706 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%707 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%708 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%709 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%710 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%711 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%712 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%713 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%714 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%715 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%716 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%717 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%718 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%719 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
%720 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
%721 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
%722 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
%723 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%724 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
%725 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
%726 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
%727 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
%728 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
%729 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%730 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
%731 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
%732 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%733 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
%734 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
%735 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%736 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
%737 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
%738 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x60x1x3x3xf16>) : !torch.vtensor<[320,60,1,3,3],f16> loc(#loc0)
%739 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xsi8>) : !torch.vtensor<[320,960,3,3],si8> loc(#loc0)
%740 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%741 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
%742 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%743 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%744 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%745 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%746 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%747 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%748 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x60x1x1x1xf16>) : !torch.vtensor<[320,60,1,1,1],f16> loc(#loc0)
%749 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xsi8>) : !torch.vtensor<[320,960,1,1],si8> loc(#loc0)
%750 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%751 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%752 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%753 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%754 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%755 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%756 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%757 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%758 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%759 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%760 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%761 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%762 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%763 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%764 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%765 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%766 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%767 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%768 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%769 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%770 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%771 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%772 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%773 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%774 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%775 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%776 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
%777 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
%778 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%779 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
%780 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
%781 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%782 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%783 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%784 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%785 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%786 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%787 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x3x3xf16>) : !torch.vtensor<[320,40,1,3,3],f16> loc(#loc0)
%788 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xsi8>) : !torch.vtensor<[320,640,3,3],si8> loc(#loc0)
%789 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%790 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
%791 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%792 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%793 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%794 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%795 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%796 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%797 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x1x1xf16>) : !torch.vtensor<[320,40,1,1,1],f16> loc(#loc0)
%798 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xsi8>) : !torch.vtensor<[320,640,1,1],si8> loc(#loc0)
%799 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%800 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%801 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%802 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%803 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%804 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%805 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%806 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%807 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%808 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%809 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%810 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%811 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%812 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%813 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%814 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%815 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%816 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%817 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%818 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%819 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%820 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%821 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%822 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%823 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%824 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%825 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
%826 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
%827 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%828 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
%829 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
%830 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%831 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%832 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%833 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%834 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%835 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
%836 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x3x3xf16>) : !torch.vtensor<[320,40,1,3,3],f16> loc(#loc0)
%837 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xsi8>) : !torch.vtensor<[320,640,3,3],si8> loc(#loc0)
%838 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%839 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
%840 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%841 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%842 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%843 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
%844 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
%845 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%846 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x1x1xf16>) : !torch.vtensor<[320,40,1,1,1],f16> loc(#loc0)
%847 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xsi8>) : !torch.vtensor<[320,640,1,1],si8> loc(#loc0)
%848 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%849 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%850 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%851 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%852 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%853 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%854 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%855 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%856 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%857 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%858 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%859 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%860 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%861 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%862 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%863 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%864 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%865 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%866 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%867 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%868 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%869 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
%870 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
%871 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
%872 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
%873 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%874 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
%875 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
%876 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
%877 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
%878 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
%879 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%880 = torch.vtensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32> loc(#loc0)
%881 = torch.vtensor.literal(dense<0.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32> loc(#loc0)
%882 = torch.vtensor.literal(dense<2.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32> loc(#loc0)
%883 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
%884 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
%885 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%886 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%887 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
%888 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16> loc(#loc0)
%889 = torch.vtensor.literal(dense<[-1.393320e-03, -1.588820e-03, -2.624990e-04, -2.531050e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16> loc(#loc0)
%int0 = torch.constant.int 0 loc(#loc1)
%false = torch.constant.bool false loc(#loc1)
%int6 = torch.constant.int 6 loc(#loc1)
%none = torch.constant.none loc(#loc0)
%int9223372036854775807 = torch.constant.int 9223372036854775807 loc(#loc1)
%int-1 = torch.constant.int -1 loc(#loc1)
%int5 = torch.constant.int 5 loc(#loc1)
%true = torch.constant.bool true loc(#loc1)
%int3 = torch.constant.int 3 loc(#loc1)
%float1.000000e-05 = torch.constant.float 1.000000e-05 loc(#loc1)
%int8192 = torch.constant.int 8192 loc(#loc1)
%int-2 = torch.constant.int -2 loc(#loc1)
%int77 = torch.constant.int 77 loc(#loc1)
%int48 = torch.constant.int 48 loc(#loc1)
%int768 = torch.constant.int 768 loc(#loc1)
%int154 = torch.constant.int 154 loc(#loc1)
%str = torch.constant.str "none" loc(#loc1)
%int2048 = torch.constant.int 2048 loc(#loc1)
%int512 = torch.constant.int 512 loc(#loc1)
%int128 = torch.constant.int 128 loc(#loc1)
%float2.000000e00 = torch.constant.float 2.000000e+00 loc(#loc1)
%int120 = torch.constant.int 120 loc(#loc1)
%890 = torch.aten.unsqueeze %arg1, %int0 : !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[1],si64> loc(#loc1)
%891 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc2)
%892 = torch.aten.broadcast_to %890, %891 : !torch.vtensor<[1],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> loc(#loc1)
%cuda3A0 = torch.constant.device "cuda:0" loc(#loc1)
%893 = torch.aten.arange.start_step %int0, %int160, %int1, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32> loc(#loc1)
%894 = torch.aten.mul.Tensor %893, %8 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32> loc(#loc1)
%895 = torch.aten.div.Tensor %894, %7 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32> loc(#loc1)
%896 = torch.aten.exp %895 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32> loc(#loc1)
%897 = torch.aten.slice.Tensor %892, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],si64> loc(#loc1)
%898 = torch.aten.unsqueeze %897, %int1 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[2,1],si64> loc(#loc1)
%899 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%900 = torch.aten.to.dtype %899, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%901 = torch.prim.ListConstruct %int2, %int1 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%902 = torch.aten.broadcast_to %900, %901 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,1],f32> loc(#loc1)
%903 = torch.valsem.aten.copy %902, %898, %false : !torch.vtensor<[2,1],f32>, !torch.vtensor<[2,1],si64>, !torch.bool -> !torch.vtensor<[2,1],f32> loc(#loc1)
%904 = torch.aten.unsqueeze %896, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32> loc(#loc1)
%905 = torch.aten.slice.Tensor %904, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,160],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,160],f32> loc(#loc1)
%906 = torch.aten.mul.Tensor %903, %905 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32> loc(#loc1)
%907 = torch.aten.mul.Tensor %906, %6 : !torch.vtensor<[2,160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,160],f32> loc(#loc1)
%908 = torch.aten.sin %907 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32> loc(#loc1)
%909 = torch.aten.cos %907 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32> loc(#loc1)
%910 = torch.prim.ListConstruct %908, %909 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor> loc(#loc1)
%911 = torch.aten.cat %910, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
%912 = torch.aten.slice.Tensor %911, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
%913 = torch.aten.slice.Tensor %912, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32> loc(#loc1)
%914 = torch.aten.slice.Tensor %911, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
%915 = torch.aten.slice.Tensor %914, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32> loc(#loc1)
%916 = torch.prim.ListConstruct %913, %915 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor> loc(#loc1)
%917 = torch.aten.cat %916, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
%918 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%919 = torch.aten.to.dtype %918, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%920 = torch.prim.ListConstruct %int2, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%921 = torch.aten.broadcast_to %919, %920 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320],f16> loc(#loc1)
%922 = torch.valsem.aten.copy %921, %917, %false : !torch.vtensor<[2,320],f16>, !torch.vtensor<[2,320],f32>, !torch.bool -> !torch.vtensor<[2,320],f16> loc(#loc1)
%923 = torch.aten.transpose.int %9, %int0, %int1 : !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,1280],f16> loc(#loc1)
%924 = torch.aten.mm %922, %923 : !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%925 = torch.aten.mul.Scalar %10, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%926 = torch.aten.add.Tensor %925, %924, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%927 = torch.aten.sigmoid %926 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%928 = torch.aten.mul.Tensor %927, %926 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%929 = torch.aten.transpose.int %11, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%930 = torch.aten.mm %928, %929 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%931 = torch.aten.mul.Scalar %12, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%932 = torch.aten.add.Tensor %931, %930, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%933 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%934 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%935 = torch.aten.convolution %arg0, %13, %14, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%936 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%937 = torch.aten.view %935, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
%938 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%939 = torch.aten.to.dtype %938, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%940 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%941 = torch.aten.broadcast_to %939, %940 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%942 = torch.valsem.aten.copy %941, %937, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%943 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%944 = torch.aten.to.dtype %942, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%945 = torch.aten.sum.dim_IntList %944, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%946 = torch.aten.div.Scalar %945, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%947 = torch.aten.sub.Tensor %944, %946, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%948 = torch.aten.mul.Tensor %947, %947 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%949 = torch.aten.sum.dim_IntList %948, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%950 = torch.aten.div.Scalar %949, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%951 = torch.aten.to.dtype %950, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%952 = torch.aten.sum.dim_IntList %942, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%953 = torch.aten.div.Scalar %952, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%954 = torch.aten.add.Tensor %951, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%955 = torch.aten.rsqrt %954 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%956 = torch.aten.sub.Tensor %937, %953, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%957 = torch.aten.mul.Tensor %956, %955 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%958 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%959 = torch.aten.view %957, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%960 = torch.aten.unsqueeze %15, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%961 = torch.aten.unsqueeze %960, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%962 = torch.aten.mul.Tensor %959, %961 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%963 = torch.aten.unsqueeze %16, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%964 = torch.aten.unsqueeze %963, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%965 = torch.aten.add.Tensor %962, %964, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%966 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%967 = torch.aten.to.dtype %966, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%968 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%969 = torch.aten.broadcast_to %967, %968 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%970 = torch.valsem.aten.copy %969, %965, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%971 = torch.aten.sigmoid %970 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%972 = torch.aten.mul.Tensor %971, %970 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%973 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%974 = torch.aten.detach %973 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%975 = torch.prim.ListConstruct %int2, %int20, %int16, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%976 = torch.aten.view %972, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%977 = torch.aten.abs %976 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values, %indices = torch.aten.max.dim %977, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%978 = torch.prim.ListConstruct %int2, %int20, %int1, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%979 = torch.aten.view %values, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%980 = torch.aten.broadcast_to %979, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%981 = torch.aten.clone %980, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%982 = torch.aten.view %981, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%983 = torch.aten.sub.Tensor %974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%984 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%985 = torch.aten.pow.Tensor_Tensor %984, %983 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%986 = torch.aten.neg %985 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%987 = torch.aten.neg %986 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%988 = torch.aten.div.Tensor %982, %987 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%989 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%990 = torch.aten.detach %989 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%991 = torch.aten.div.Tensor %972, %988 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%992 = torch.aten.add.Tensor %991, %990, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%993 = torch.aten.sub.Tensor %974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%994 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%995 = torch.aten.pow.Tensor_Tensor %994, %993 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%996 = torch.aten.neg %995 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%997 = torch.aten.sub.Tensor %974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%998 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%999 = torch.aten.pow.Tensor_Tensor %998, %997 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1000 = torch.aten.sub.Tensor %999, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1001 = torch.aten.gt.Tensor %992, %1000 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1002 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1003 = torch.aten.to.dtype %1002, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1004 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1005 = torch.aten.broadcast_to %1003, %1004 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1006 = torch.valsem.aten.copy %1005, %1000, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1007 = torch.aten.where.self %1001, %1006, %992 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1008 = torch.aten.lt.Tensor %1007, %996 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1009 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1010 = torch.aten.to.dtype %1009, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1011 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1012 = torch.aten.broadcast_to %1010, %1011 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1013 = torch.valsem.aten.copy %1012, %996, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1014 = torch.aten.where.self %1008, %1013, %1007 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1015 = torch.aten.round %1014 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1016 = torch.aten.sub.Tensor %1015, %990, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1017 = torch.aten.mul.Tensor %1016, %988 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1018 = torch.prim.ListConstruct %int320, %int20, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1019 = torch.aten.broadcast_to %17, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%1020 = torch.aten.clone %1019, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%1021 = torch.prim.ListConstruct %int320, %int320, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1022 = torch.aten.view %1020, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%1023 = torch.aten.mul.Tensor %18, %1022 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%1024 = torch.aten.convolution %1017, %1023, %19, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1025 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%1026 = torch.aten.mul.Tensor %1025, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%1027 = torch.aten.transpose.int %20, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
%1028 = torch.aten.mm %1026, %1027 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1029 = torch.aten.mul.Scalar %21, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1030 = torch.aten.add.Tensor %1029, %1028, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1031 = torch.aten.slice.Tensor %1030, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1032 = torch.aten.slice.Tensor %1031, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1033 = torch.aten.unsqueeze %1032, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16> loc(#loc1)
%1034 = torch.aten.unsqueeze %1033, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16> loc(#loc1)
%1035 = torch.aten.add.Tensor %1024, %1034, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1036 = torch.aten.view %1035, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
%1037 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1038 = torch.aten.to.dtype %1037, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1039 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1040 = torch.aten.broadcast_to %1038, %1039 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1041 = torch.valsem.aten.copy %1040, %1036, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1042 = torch.aten.to.dtype %1041, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1043 = torch.aten.sum.dim_IntList %1042, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1044 = torch.aten.div.Scalar %1043, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1045 = torch.aten.sub.Tensor %1042, %1044, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1046 = torch.aten.mul.Tensor %1045, %1045 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1047 = torch.aten.sum.dim_IntList %1046, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1048 = torch.aten.div.Scalar %1047, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1049 = torch.aten.to.dtype %1048, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1050 = torch.aten.sum.dim_IntList %1041, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1051 = torch.aten.div.Scalar %1050, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1052 = torch.aten.add.Tensor %1049, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1053 = torch.aten.rsqrt %1052 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1054 = torch.aten.sub.Tensor %1036, %1051, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1055 = torch.aten.mul.Tensor %1054, %1053 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1056 = torch.aten.view %1055, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1057 = torch.aten.unsqueeze %22, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%1058 = torch.aten.unsqueeze %1057, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%1059 = torch.aten.mul.Tensor %1056, %1058 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1060 = torch.aten.unsqueeze %23, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%1061 = torch.aten.unsqueeze %1060, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%1062 = torch.aten.add.Tensor %1059, %1061, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1063 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1064 = torch.aten.to.dtype %1063, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1065 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1066 = torch.aten.broadcast_to %1064, %1065 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1067 = torch.valsem.aten.copy %1066, %1062, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1068 = torch.aten.sigmoid %1067 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1069 = torch.aten.mul.Tensor %1068, %1067 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1070 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1071 = torch.aten.detach %1070 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1072 = torch.aten.view %1069, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1073 = torch.aten.abs %1072 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_0, %indices_1 = torch.aten.max.dim %1073, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%1074 = torch.aten.view %values_0, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%1075 = torch.aten.broadcast_to %1074, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1076 = torch.aten.clone %1075, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1077 = torch.aten.view %1076, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1078 = torch.aten.sub.Tensor %1071, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1079 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1080 = torch.aten.pow.Tensor_Tensor %1079, %1078 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1081 = torch.aten.neg %1080 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1082 = torch.aten.neg %1081 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1083 = torch.aten.div.Tensor %1077, %1082 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1084 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1085 = torch.aten.detach %1084 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1086 = torch.aten.div.Tensor %1069, %1083 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1087 = torch.aten.add.Tensor %1086, %1085, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1088 = torch.aten.sub.Tensor %1071, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1089 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1090 = torch.aten.pow.Tensor_Tensor %1089, %1088 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1091 = torch.aten.neg %1090 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1092 = torch.aten.sub.Tensor %1071, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1093 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1094 = torch.aten.pow.Tensor_Tensor %1093, %1092 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1095 = torch.aten.sub.Tensor %1094, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1096 = torch.aten.gt.Tensor %1087, %1095 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1097 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1098 = torch.aten.to.dtype %1097, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1099 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1100 = torch.aten.broadcast_to %1098, %1099 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1101 = torch.valsem.aten.copy %1100, %1095, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1102 = torch.aten.where.self %1096, %1101, %1087 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1103 = torch.aten.lt.Tensor %1102, %1091 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1104 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1105 = torch.aten.to.dtype %1104, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1106 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1107 = torch.aten.broadcast_to %1105, %1106 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1108 = torch.valsem.aten.copy %1107, %1091, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1109 = torch.aten.where.self %1103, %1108, %1102 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1110 = torch.aten.round %1109 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1111 = torch.aten.sub.Tensor %1110, %1085, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1112 = torch.aten.mul.Tensor %1111, %1083 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1113 = torch.aten.broadcast_to %24, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%1114 = torch.aten.clone %1113, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%1115 = torch.aten.view %1114, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%1116 = torch.aten.mul.Tensor %25, %1115 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%1117 = torch.aten.convolution %1112, %1116, %26, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1118 = torch.aten.add.Tensor %935, %1117, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1119 = torch.aten.div.Tensor %1118, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1120 = torch.aten.view %1119, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
%1121 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1122 = torch.aten.to.dtype %1121, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1123 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1124 = torch.aten.broadcast_to %1122, %1123 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1125 = torch.valsem.aten.copy %1124, %1120, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1126 = torch.aten.to.dtype %1125, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1127 = torch.aten.sum.dim_IntList %1126, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1128 = torch.aten.div.Scalar %1127, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1129 = torch.aten.sub.Tensor %1126, %1128, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1130 = torch.aten.mul.Tensor %1129, %1129 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1131 = torch.aten.sum.dim_IntList %1130, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1132 = torch.aten.div.Scalar %1131, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1133 = torch.aten.to.dtype %1132, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1134 = torch.aten.sum.dim_IntList %1125, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1135 = torch.aten.div.Scalar %1134, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1136 = torch.aten.add.Tensor %1133, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1137 = torch.aten.rsqrt %1136 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1138 = torch.aten.sub.Tensor %1120, %1135, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1139 = torch.aten.mul.Tensor %1138, %1137 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1140 = torch.aten.view %1139, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1141 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1142 = torch.aten.to.dtype %1141, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1143 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1144 = torch.aten.broadcast_to %1142, %1143 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1145 = torch.valsem.aten.copy %1144, %1140, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1146 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1147 = torch.aten.detach %1146 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1148 = torch.aten.view %1145, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1149 = torch.aten.abs %1148 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_2, %indices_3 = torch.aten.max.dim %1149, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%1150 = torch.aten.view %values_2, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%1151 = torch.aten.broadcast_to %1150, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1152 = torch.aten.clone %1151, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1153 = torch.aten.view %1152, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1154 = torch.aten.sub.Tensor %1147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1155 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1156 = torch.aten.pow.Tensor_Tensor %1155, %1154 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1157 = torch.aten.neg %1156 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1158 = torch.aten.neg %1157 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1159 = torch.aten.div.Tensor %1153, %1158 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1160 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1161 = torch.aten.detach %1160 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1162 = torch.aten.div.Tensor %1145, %1159 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1163 = torch.aten.add.Tensor %1162, %1161, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1164 = torch.aten.sub.Tensor %1147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1165 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1166 = torch.aten.pow.Tensor_Tensor %1165, %1164 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1167 = torch.aten.neg %1166 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1168 = torch.aten.sub.Tensor %1147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1169 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1170 = torch.aten.pow.Tensor_Tensor %1169, %1168 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1171 = torch.aten.sub.Tensor %1170, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1172 = torch.aten.gt.Tensor %1163, %1171 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1173 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1174 = torch.aten.to.dtype %1173, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1175 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1176 = torch.aten.broadcast_to %1174, %1175 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1177 = torch.valsem.aten.copy %1176, %1171, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1178 = torch.aten.where.self %1172, %1177, %1163 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1179 = torch.aten.lt.Tensor %1178, %1167 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1180 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1181 = torch.aten.to.dtype %1180, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1182 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1183 = torch.aten.broadcast_to %1181, %1182 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1184 = torch.valsem.aten.copy %1183, %1167, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1185 = torch.aten.where.self %1179, %1184, %1178 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1186 = torch.aten.round %1185 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1187 = torch.aten.sub.Tensor %1186, %1161, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1188 = torch.aten.mul.Tensor %1187, %1159 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1189 = torch.prim.ListConstruct %int320, %int20, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1190 = torch.aten.broadcast_to %27, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%1191 = torch.aten.clone %1190, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%1192 = torch.prim.ListConstruct %int320, %int320, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1193 = torch.aten.view %1191, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%1194 = torch.aten.mul.Tensor %28, %1193 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%1195 = torch.aten.convolution %1188, %1194, %29, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1196 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%1197 = torch.aten.permute %1195, %1196 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
%1198 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%1199 = torch.aten.view %1197, %1198 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1200 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%1201 = torch.aten.sum.dim_IntList %1199, %1200, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1202 = torch.aten.div.Scalar %1201, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1203 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1204 = torch.aten.broadcast_to %1202, %1203 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1205 = torch.aten.sub.Tensor %1199, %1204, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1206 = torch.aten.mul.Tensor %1205, %1205 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1207 = torch.aten.sum.dim_IntList %1206, %1200, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1208 = torch.aten.div.Scalar %1207, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1209 = torch.aten.add.Scalar %1208, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1210 = torch.aten.rsqrt %1209 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1211 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1212 = torch.aten.broadcast_to %1210, %1211 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1213 = torch.aten.mul.Tensor %1205, %1212 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1214 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1215 = torch.aten.detach %1214 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1216 = torch.prim.ListConstruct %int2, %int4096, %int20, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1217 = torch.aten.view %1213, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1218 = torch.aten.abs %1217 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_4, %indices_5 = torch.aten.max.dim %1218, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%1219 = torch.prim.ListConstruct %int2, %int4096, %int20, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1220 = torch.aten.view %values_4, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%1221 = torch.aten.broadcast_to %1220, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1222 = torch.aten.clone %1221, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1223 = torch.aten.view %1222, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1224 = torch.aten.sub.Tensor %1215, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1225 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1226 = torch.aten.pow.Tensor_Tensor %1225, %1224 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1227 = torch.aten.neg %1226 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1228 = torch.aten.neg %1227 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1229 = torch.aten.div.Tensor %1223, %1228 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1230 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1231 = torch.aten.detach %1230 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1232 = torch.aten.div.Tensor %1213, %1229 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1233 = torch.aten.add.Tensor %1232, %1231, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1234 = torch.aten.sub.Tensor %1215, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1235 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1236 = torch.aten.pow.Tensor_Tensor %1235, %1234 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1237 = torch.aten.neg %1236 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1238 = torch.aten.sub.Tensor %1215, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1239 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1240 = torch.aten.pow.Tensor_Tensor %1239, %1238 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1241 = torch.aten.sub.Tensor %1240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1242 = torch.aten.gt.Tensor %1233, %1241 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1243 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1244 = torch.aten.to.dtype %1243, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1245 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1246 = torch.aten.broadcast_to %1244, %1245 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1247 = torch.valsem.aten.copy %1246, %1241, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1248 = torch.aten.where.self %1242, %1247, %1233 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1249 = torch.aten.lt.Tensor %1248, %1237 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1250 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1251 = torch.aten.to.dtype %1250, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1252 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1253 = torch.aten.broadcast_to %1251, %1252 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1254 = torch.valsem.aten.copy %1253, %1237, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1255 = torch.aten.where.self %1249, %1254, %1248 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1256 = torch.aten.round %1255 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1257 = torch.aten.sub.Tensor %1256, %1231, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1258 = torch.aten.mul.Tensor %1257, %1229 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1259 = torch.prim.ListConstruct %int320, %int20, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1260 = torch.aten.broadcast_to %30, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1261 = torch.aten.clone %1260, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1262 = torch.prim.ListConstruct %int320, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1263 = torch.aten.view %1261, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1264 = torch.aten.mul.Tensor %31, %1263 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1265 = torch.aten.transpose.int %1264, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1266 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1267 = torch.aten.view %1258, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1268 = torch.aten.mm %1267, %1265 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1269 = torch.aten.mul.Scalar %32, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1270 = torch.aten.add.Tensor %1269, %1268, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1271 = torch.aten.view %1270, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1272 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1273 = torch.aten.detach %1272 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1274 = torch.aten.view %1213, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1275 = torch.aten.abs %1274 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_6, %indices_7 = torch.aten.max.dim %1275, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%1276 = torch.aten.view %values_6, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%1277 = torch.aten.broadcast_to %1276, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1278 = torch.aten.clone %1277, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1279 = torch.aten.view %1278, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1280 = torch.aten.sub.Tensor %1273, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1281 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1282 = torch.aten.pow.Tensor_Tensor %1281, %1280 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1283 = torch.aten.neg %1282 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1284 = torch.aten.neg %1283 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1285 = torch.aten.div.Tensor %1279, %1284 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1286 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1287 = torch.aten.detach %1286 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1288 = torch.aten.div.Tensor %1213, %1285 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1289 = torch.aten.add.Tensor %1288, %1287, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1290 = torch.aten.sub.Tensor %1273, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1291 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1292 = torch.aten.pow.Tensor_Tensor %1291, %1290 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1293 = torch.aten.neg %1292 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1294 = torch.aten.sub.Tensor %1273, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1295 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1296 = torch.aten.pow.Tensor_Tensor %1295, %1294 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1297 = torch.aten.sub.Tensor %1296, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1298 = torch.aten.gt.Tensor %1289, %1297 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1299 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1300 = torch.aten.to.dtype %1299, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1301 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1302 = torch.aten.broadcast_to %1300, %1301 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1303 = torch.valsem.aten.copy %1302, %1297, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1304 = torch.aten.where.self %1298, %1303, %1289 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1305 = torch.aten.lt.Tensor %1304, %1293 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1306 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1307 = torch.aten.to.dtype %1306, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1308 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1309 = torch.aten.broadcast_to %1307, %1308 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1310 = torch.valsem.aten.copy %1309, %1293, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1311 = torch.aten.where.self %1305, %1310, %1304 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1312 = torch.aten.round %1311 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1313 = torch.aten.sub.Tensor %1312, %1287, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1314 = torch.aten.mul.Tensor %1313, %1285 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1315 = torch.aten.broadcast_to %33, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1316 = torch.aten.clone %1315, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1317 = torch.aten.view %1316, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1318 = torch.aten.mul.Tensor %34, %1317 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1319 = torch.aten.transpose.int %1318, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1320 = torch.aten.view %1314, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1321 = torch.aten.mm %1320, %1319 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1322 = torch.aten.mul.Scalar %35, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1323 = torch.aten.add.Tensor %1322, %1321, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1324 = torch.aten.view %1323, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1325 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1326 = torch.aten.detach %1325 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1327 = torch.aten.view %1213, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1328 = torch.aten.abs %1327 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_8, %indices_9 = torch.aten.max.dim %1328, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%1329 = torch.aten.view %values_8, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%1330 = torch.aten.broadcast_to %1329, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1331 = torch.aten.clone %1330, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1332 = torch.aten.view %1331, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1333 = torch.aten.sub.Tensor %1326, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1334 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1335 = torch.aten.pow.Tensor_Tensor %1334, %1333 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1336 = torch.aten.neg %1335 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1337 = torch.aten.neg %1336 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1338 = torch.aten.div.Tensor %1332, %1337 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1339 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1340 = torch.aten.detach %1339 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1341 = torch.aten.div.Tensor %1213, %1338 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1342 = torch.aten.add.Tensor %1341, %1340, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1343 = torch.aten.sub.Tensor %1326, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1344 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1345 = torch.aten.pow.Tensor_Tensor %1344, %1343 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1346 = torch.aten.neg %1345 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1347 = torch.aten.sub.Tensor %1326, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1348 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1349 = torch.aten.pow.Tensor_Tensor %1348, %1347 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1350 = torch.aten.sub.Tensor %1349, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1351 = torch.aten.gt.Tensor %1342, %1350 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1352 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1353 = torch.aten.to.dtype %1352, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1354 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1355 = torch.aten.broadcast_to %1353, %1354 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1356 = torch.valsem.aten.copy %1355, %1350, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1357 = torch.aten.where.self %1351, %1356, %1342 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1358 = torch.aten.lt.Tensor %1357, %1346 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1359 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1360 = torch.aten.to.dtype %1359, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1361 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1362 = torch.aten.broadcast_to %1360, %1361 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1363 = torch.valsem.aten.copy %1362, %1346, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1364 = torch.aten.where.self %1358, %1363, %1357 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1365 = torch.aten.round %1364 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1366 = torch.aten.sub.Tensor %1365, %1340, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1367 = torch.aten.mul.Tensor %1366, %1338 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1368 = torch.aten.broadcast_to %36, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1369 = torch.aten.clone %1368, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1370 = torch.aten.view %1369, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1371 = torch.aten.mul.Tensor %37, %1370 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1372 = torch.aten.transpose.int %1371, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1373 = torch.aten.view %1367, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1374 = torch.aten.mm %1373, %1372 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1375 = torch.aten.mul.Scalar %38, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1376 = torch.aten.add.Tensor %1375, %1374, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1377 = torch.aten.view %1376, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1378 = torch.prim.ListConstruct %int2, %int4096, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%1379 = torch.aten.view %1271, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1380 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%1381 = torch.aten.permute %1379, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1382 = torch.aten.clone %1381, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1383 = torch.prim.ListConstruct %int16, %int4096, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1384 = torch.aten.view %1382, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1385 = torch.aten.view %1324, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1386 = torch.aten.permute %1385, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1387 = torch.aten.clone %1386, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1388 = torch.aten.view %1387, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1389 = torch.aten.view %1377, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1390 = torch.aten.permute %1389, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1391 = torch.aten.clone %1390, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1392 = torch.aten.view %1391, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1393 = torch.aten.transpose.int %1388, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
%1394 = torch.aten.broadcast_to %1384, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1395 = torch.aten.view %1394, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1396 = torch.prim.ListConstruct %int16, %int40, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1397 = torch.aten.broadcast_to %1393, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
%1398 = torch.aten.view %1397, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
%1399 = torch.aten.bmm %1395, %1398 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1400 = torch.prim.ListConstruct %int16, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1401 = torch.aten.view %1399, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1402 = torch.aten.mul.Tensor %1401, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%values_10, %indices_11 = torch.aten.max.dim %1402, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
%1403 = torch.aten.sub.Tensor %1402, %values_10, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1404 = torch.aten.exp %1403 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1405 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%1406 = torch.aten.sum.dim_IntList %1404, %1405, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
%1407 = torch.aten.div.Tensor %1404, %1406 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1408 = torch.aten.broadcast_to %1407, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1409 = torch.aten.view %1408, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%1410 = torch.aten.broadcast_to %1392, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1411 = torch.aten.view %1410, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1412 = torch.aten.bmm %1409, %1411 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1413 = torch.aten.view %1412, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1414 = torch.prim.ListConstruct %int2, %int8, %int4096, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%1415 = torch.aten.view %1413, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1416 = torch.aten.permute %1415, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1417 = torch.aten.clone %1416, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1418 = torch.aten.view %1417, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1419 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1420 = torch.aten.detach %1419 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1421 = torch.aten.view %1418, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1422 = torch.aten.abs %1421 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_12, %indices_13 = torch.aten.max.dim %1422, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%1423 = torch.aten.view %values_12, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%1424 = torch.aten.broadcast_to %1423, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1425 = torch.aten.clone %1424, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1426 = torch.aten.view %1425, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1427 = torch.aten.sub.Tensor %1420, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1428 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1429 = torch.aten.pow.Tensor_Tensor %1428, %1427 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1430 = torch.aten.neg %1429 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1431 = torch.aten.neg %1430 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1432 = torch.aten.div.Tensor %1426, %1431 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1433 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1434 = torch.aten.detach %1433 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1435 = torch.aten.div.Tensor %1418, %1432 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1436 = torch.aten.add.Tensor %1435, %1434, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1437 = torch.aten.sub.Tensor %1420, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1438 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1439 = torch.aten.pow.Tensor_Tensor %1438, %1437 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1440 = torch.aten.neg %1439 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1441 = torch.aten.sub.Tensor %1420, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1442 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1443 = torch.aten.pow.Tensor_Tensor %1442, %1441 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1444 = torch.aten.sub.Tensor %1443, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1445 = torch.aten.gt.Tensor %1436, %1444 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1446 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1447 = torch.aten.to.dtype %1446, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1448 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1449 = torch.aten.broadcast_to %1447, %1448 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1450 = torch.valsem.aten.copy %1449, %1444, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1451 = torch.aten.where.self %1445, %1450, %1436 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1452 = torch.aten.lt.Tensor %1451, %1440 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1453 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1454 = torch.aten.to.dtype %1453, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1455 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1456 = torch.aten.broadcast_to %1454, %1455 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1457 = torch.valsem.aten.copy %1456, %1440, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1458 = torch.aten.where.self %1452, %1457, %1451 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1459 = torch.aten.round %1458 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1460 = torch.aten.sub.Tensor %1459, %1434, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1461 = torch.aten.mul.Tensor %1460, %1432 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1462 = torch.aten.broadcast_to %39, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1463 = torch.aten.clone %1462, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1464 = torch.aten.view %1463, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1465 = torch.aten.mul.Tensor %40, %1464 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1466 = torch.aten.transpose.int %1465, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1467 = torch.aten.view %1461, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1468 = torch.aten.mm %1467, %1466 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1469 = torch.aten.mul.Scalar %41, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1470 = torch.aten.add.Tensor %1469, %1468, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1471 = torch.aten.view %1470, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1472 = torch.aten.add.Tensor %1471, %1199, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1473 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%1474 = torch.aten.sum.dim_IntList %1472, %1473, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1475 = torch.aten.div.Scalar %1474, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1476 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1477 = torch.aten.broadcast_to %1475, %1476 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1478 = torch.aten.sub.Tensor %1472, %1477, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1479 = torch.aten.mul.Tensor %1478, %1478 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1480 = torch.aten.sum.dim_IntList %1479, %1473, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1481 = torch.aten.div.Scalar %1480, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1482 = torch.aten.add.Scalar %1481, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1483 = torch.aten.rsqrt %1482 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1484 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1485 = torch.aten.broadcast_to %1483, %1484 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1486 = torch.aten.mul.Tensor %1478, %1485 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1487 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1488 = torch.aten.detach %1487 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1489 = torch.aten.view %1486, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1490 = torch.aten.abs %1489 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_14, %indices_15 = torch.aten.max.dim %1490, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%1491 = torch.aten.view %values_14, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%1492 = torch.aten.broadcast_to %1491, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1493 = torch.aten.clone %1492, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1494 = torch.aten.view %1493, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1495 = torch.aten.sub.Tensor %1488, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1496 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1497 = torch.aten.pow.Tensor_Tensor %1496, %1495 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1498 = torch.aten.neg %1497 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1499 = torch.aten.neg %1498 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1500 = torch.aten.div.Tensor %1494, %1499 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1501 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1502 = torch.aten.detach %1501 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1503 = torch.aten.div.Tensor %1486, %1500 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1504 = torch.aten.add.Tensor %1503, %1502, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1505 = torch.aten.sub.Tensor %1488, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1506 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1507 = torch.aten.pow.Tensor_Tensor %1506, %1505 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1508 = torch.aten.neg %1507 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1509 = torch.aten.sub.Tensor %1488, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1510 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1511 = torch.aten.pow.Tensor_Tensor %1510, %1509 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1512 = torch.aten.sub.Tensor %1511, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1513 = torch.aten.gt.Tensor %1504, %1512 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1514 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1515 = torch.aten.to.dtype %1514, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1516 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1517 = torch.aten.broadcast_to %1515, %1516 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1518 = torch.valsem.aten.copy %1517, %1512, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1519 = torch.aten.where.self %1513, %1518, %1504 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1520 = torch.aten.lt.Tensor %1519, %1508 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1521 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1522 = torch.aten.to.dtype %1521, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1523 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1524 = torch.aten.broadcast_to %1522, %1523 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1525 = torch.valsem.aten.copy %1524, %1508, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1526 = torch.aten.where.self %1520, %1525, %1519 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1527 = torch.aten.round %1526 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1528 = torch.aten.sub.Tensor %1527, %1502, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1529 = torch.aten.mul.Tensor %1528, %1500 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1530 = torch.aten.broadcast_to %42, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1531 = torch.aten.clone %1530, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1532 = torch.aten.view %1531, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1533 = torch.aten.mul.Tensor %43, %1532 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1534 = torch.aten.transpose.int %1533, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1535 = torch.aten.view %1529, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1536 = torch.aten.mm %1535, %1534 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1537 = torch.aten.mul.Scalar %44, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1538 = torch.aten.add.Tensor %1537, %1536, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1539 = torch.aten.view %1538, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1540 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1541 = torch.aten.detach %1540 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1542 = torch.prim.ListConstruct %int2, %int77, %int48, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1543 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%1544 = torch.aten.abs %1543 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_16, %indices_17 = torch.aten.max.dim %1544, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%1545 = torch.prim.ListConstruct %int2, %int77, %int48, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1546 = torch.aten.view %values_16, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%1547 = torch.aten.broadcast_to %1546, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%1548 = torch.aten.clone %1547, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%1549 = torch.prim.ListConstruct %int2, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc6)
%1550 = torch.aten.view %1548, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1551 = torch.aten.sub.Tensor %1541, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1552 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1553 = torch.aten.pow.Tensor_Tensor %1552, %1551 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1554 = torch.aten.neg %1553 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1555 = torch.aten.neg %1554 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1556 = torch.aten.div.Tensor %1550, %1555 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1557 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1558 = torch.aten.detach %1557 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1559 = torch.aten.div.Tensor %arg2, %1556 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1560 = torch.aten.add.Tensor %1559, %1558, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1561 = torch.aten.sub.Tensor %1541, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1562 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1563 = torch.aten.pow.Tensor_Tensor %1562, %1561 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1564 = torch.aten.neg %1563 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1565 = torch.aten.sub.Tensor %1541, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1566 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1567 = torch.aten.pow.Tensor_Tensor %1566, %1565 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1568 = torch.aten.sub.Tensor %1567, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1569 = torch.aten.gt.Tensor %1560, %1568 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%1570 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1571 = torch.aten.to.dtype %1570, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1572 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1573 = torch.aten.broadcast_to %1571, %1572 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1574 = torch.valsem.aten.copy %1573, %1568, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1575 = torch.aten.where.self %1569, %1574, %1560 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1576 = torch.aten.lt.Tensor %1575, %1564 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%1577 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1578 = torch.aten.to.dtype %1577, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1579 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1580 = torch.aten.broadcast_to %1578, %1579 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1581 = torch.valsem.aten.copy %1580, %1564, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1582 = torch.aten.where.self %1576, %1581, %1575 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1583 = torch.aten.round %1582 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1584 = torch.aten.sub.Tensor %1583, %1558, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1585 = torch.aten.mul.Tensor %1584, %1556 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1586 = torch.prim.ListConstruct %int320, %int48, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1587 = torch.aten.broadcast_to %45, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%1588 = torch.aten.clone %1587, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%1589 = torch.prim.ListConstruct %int320, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1590 = torch.aten.view %1588, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%1591 = torch.aten.mul.Tensor %46, %1590 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%1592 = torch.aten.transpose.int %1591, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
%1593 = torch.prim.ListConstruct %int154, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%1594 = torch.aten.view %1585, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%1595 = torch.aten.mm %1594, %1592 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
%1596 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%1597 = torch.aten.view %1595, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
%1598 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1599 = torch.aten.detach %1598 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1600 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%1601 = torch.aten.abs %1600 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_18, %indices_19 = torch.aten.max.dim %1601, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%1602 = torch.aten.view %values_18, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%1603 = torch.aten.broadcast_to %1602, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%1604 = torch.aten.clone %1603, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%1605 = torch.aten.view %1604, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1606 = torch.aten.sub.Tensor %1599, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1607 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1608 = torch.aten.pow.Tensor_Tensor %1607, %1606 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1609 = torch.aten.neg %1608 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1610 = torch.aten.neg %1609 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1611 = torch.aten.div.Tensor %1605, %1610 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1612 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1613 = torch.aten.detach %1612 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1614 = torch.aten.div.Tensor %arg2, %1611 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1615 = torch.aten.add.Tensor %1614, %1613, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1616 = torch.aten.sub.Tensor %1599, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1617 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1618 = torch.aten.pow.Tensor_Tensor %1617, %1616 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1619 = torch.aten.neg %1618 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1620 = torch.aten.sub.Tensor %1599, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1621 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1622 = torch.aten.pow.Tensor_Tensor %1621, %1620 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1623 = torch.aten.sub.Tensor %1622, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1624 = torch.aten.gt.Tensor %1615, %1623 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%1625 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1626 = torch.aten.to.dtype %1625, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1627 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1628 = torch.aten.broadcast_to %1626, %1627 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1629 = torch.valsem.aten.copy %1628, %1623, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1630 = torch.aten.where.self %1624, %1629, %1615 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1631 = torch.aten.lt.Tensor %1630, %1619 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%1632 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1633 = torch.aten.to.dtype %1632, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1634 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1635 = torch.aten.broadcast_to %1633, %1634 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1636 = torch.valsem.aten.copy %1635, %1619, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1637 = torch.aten.where.self %1631, %1636, %1630 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1638 = torch.aten.round %1637 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1639 = torch.aten.sub.Tensor %1638, %1613, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1640 = torch.aten.mul.Tensor %1639, %1611 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%1641 = torch.aten.broadcast_to %47, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%1642 = torch.aten.clone %1641, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%1643 = torch.aten.view %1642, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%1644 = torch.aten.mul.Tensor %48, %1643 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%1645 = torch.aten.transpose.int %1644, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
%1646 = torch.aten.view %1640, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%1647 = torch.aten.mm %1646, %1645 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
%1648 = torch.aten.view %1647, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
%1649 = torch.aten.view %1539, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1650 = torch.aten.permute %1649, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1651 = torch.aten.clone %1650, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1652 = torch.aten.view %1651, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1653 = torch.prim.ListConstruct %int2, %int77, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%1654 = torch.aten.view %1597, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
%1655 = torch.aten.permute %1654, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%1656 = torch.aten.clone %1655, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%1657 = torch.prim.ListConstruct %int16, %int77, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1658 = torch.aten.view %1656, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%1659 = torch.aten.view %1648, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
%1660 = torch.aten.permute %1659, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%1661 = torch.aten.clone %1660, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%1662 = torch.aten.view %1661, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%1663 = torch.aten.transpose.int %1658, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
%1664 = torch.aten.broadcast_to %1652, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1665 = torch.aten.view %1664, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1666 = torch.prim.ListConstruct %int16, %int40, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1667 = torch.aten.broadcast_to %1663, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
%1668 = torch.aten.view %1667, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
%1669 = torch.aten.bmm %1665, %1668 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1670 = torch.prim.ListConstruct %int16, %int4096, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1671 = torch.aten.view %1669, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1672 = torch.aten.mul.Tensor %1671, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%values_20, %indices_21 = torch.aten.max.dim %1672, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
%1673 = torch.aten.sub.Tensor %1672, %values_20, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1674 = torch.aten.exp %1673 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1675 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%1676 = torch.aten.sum.dim_IntList %1674, %1675, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
%1677 = torch.aten.div.Tensor %1674, %1676 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1678 = torch.aten.broadcast_to %1677, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1679 = torch.aten.view %1678, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%1680 = torch.aten.broadcast_to %1662, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%1681 = torch.aten.view %1680, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%1682 = torch.aten.bmm %1679, %1681 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1683 = torch.aten.view %1682, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%1684 = torch.aten.view %1683, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%1685 = torch.aten.permute %1684, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1686 = torch.aten.clone %1685, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%1687 = torch.aten.view %1686, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1688 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1689 = torch.aten.detach %1688 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1690 = torch.aten.view %1687, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1691 = torch.aten.abs %1690 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_22, %indices_23 = torch.aten.max.dim %1691, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%1692 = torch.aten.view %values_22, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%1693 = torch.aten.broadcast_to %1692, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1694 = torch.aten.clone %1693, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%1695 = torch.aten.view %1694, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1696 = torch.aten.sub.Tensor %1689, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1697 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1698 = torch.aten.pow.Tensor_Tensor %1697, %1696 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1699 = torch.aten.neg %1698 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1700 = torch.aten.neg %1699 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1701 = torch.aten.div.Tensor %1695, %1700 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1702 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1703 = torch.aten.detach %1702 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1704 = torch.aten.div.Tensor %1687, %1701 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1705 = torch.aten.add.Tensor %1704, %1703, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1706 = torch.aten.sub.Tensor %1689, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1707 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1708 = torch.aten.pow.Tensor_Tensor %1707, %1706 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1709 = torch.aten.neg %1708 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1710 = torch.aten.sub.Tensor %1689, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1711 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1712 = torch.aten.pow.Tensor_Tensor %1711, %1710 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1713 = torch.aten.sub.Tensor %1712, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1714 = torch.aten.gt.Tensor %1705, %1713 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1715 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1716 = torch.aten.to.dtype %1715, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1717 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1718 = torch.aten.broadcast_to %1716, %1717 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1719 = torch.valsem.aten.copy %1718, %1713, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1720 = torch.aten.where.self %1714, %1719, %1705 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1721 = torch.aten.lt.Tensor %1720, %1709 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%1722 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1723 = torch.aten.to.dtype %1722, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1724 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1725 = torch.aten.broadcast_to %1723, %1724 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1726 = torch.valsem.aten.copy %1725, %1709, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1727 = torch.aten.where.self %1721, %1726, %1720 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1728 = torch.aten.round %1727 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1729 = torch.aten.sub.Tensor %1728, %1703, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1730 = torch.aten.mul.Tensor %1729, %1701 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1731 = torch.aten.broadcast_to %49, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1732 = torch.aten.clone %1731, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%1733 = torch.aten.view %1732, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1734 = torch.aten.mul.Tensor %50, %1733 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1735 = torch.aten.transpose.int %1734, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%1736 = torch.aten.view %1730, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1737 = torch.aten.mm %1736, %1735 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1738 = torch.aten.mul.Scalar %51, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1739 = torch.aten.add.Tensor %1738, %1737, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1740 = torch.aten.view %1739, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1741 = torch.aten.add.Tensor %1740, %1472, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1742 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%1743 = torch.aten.sum.dim_IntList %1741, %1742, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1744 = torch.aten.div.Scalar %1743, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1745 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1746 = torch.aten.broadcast_to %1744, %1745 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1747 = torch.aten.sub.Tensor %1741, %1746, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1748 = torch.aten.mul.Tensor %1747, %1747 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1749 = torch.aten.sum.dim_IntList %1748, %1742, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1750 = torch.aten.div.Scalar %1749, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1751 = torch.aten.add.Scalar %1750, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1752 = torch.aten.rsqrt %1751 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%1753 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1754 = torch.aten.broadcast_to %1752, %1753 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1755 = torch.aten.mul.Tensor %1747, %1754 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1756 = torch.prim.ListConstruct %int2560, %int20, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1757 = torch.aten.broadcast_to %52, %1756 : !torch.vtensor<[2560,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
%1758 = torch.aten.clone %1757, %int0 : !torch.vtensor<[2560,20,16],f16>, !torch.int -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
%1759 = torch.prim.ListConstruct %int2560, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1760 = torch.aten.view %1758, %1759 : !torch.vtensor<[2560,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
%1761 = torch.aten.mul.Tensor %53, %1760 : !torch.vtensor<[2560,320],si8>, !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
%1762 = torch.aten.transpose.int %1761, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16> loc(#loc1)
%1763 = torch.aten.view %1755, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1764 = torch.aten.mm %1763, %1762 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
%1765 = torch.aten.mul.Scalar %54, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16> loc(#loc1)
%1766 = torch.aten.add.Tensor %1765, %1764, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
%1767 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1768 = torch.aten.view %1766, %1767 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16> loc(#loc1)
%1769 = torch.aten.slice.Tensor %1768, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%1770 = torch.aten.slice.Tensor %1768, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%1771 = torch.aten.gelu %1770, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%1772 = torch.aten.mul.Tensor %1769, %1771 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%1773 = torch.prim.ListConstruct %int320, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%1774 = torch.aten.broadcast_to %55, %1773 : !torch.vtensor<[320,80,1],f16>, !torch.list<int> -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
%1775 = torch.aten.clone %1774, %int0 : !torch.vtensor<[320,80,16],f16>, !torch.int -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
%1776 = torch.prim.ListConstruct %int320, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%1777 = torch.aten.view %1775, %1776 : !torch.vtensor<[320,80,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
%1778 = torch.aten.mul.Tensor %56, %1777 : !torch.vtensor<[320,1280],si8>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
%1779 = torch.aten.transpose.int %1778, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
%1780 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%1781 = torch.aten.view %1772, %1780 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16> loc(#loc1)
%1782 = torch.aten.mm %1781, %1779 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1783 = torch.aten.mul.Scalar %57, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1784 = torch.aten.add.Tensor %1783, %1782, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%1785 = torch.aten.view %1784, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1786 = torch.aten.add.Tensor %1785, %1741, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%1787 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%1788 = torch.aten.view %1786, %1787 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
%1789 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%1790 = torch.aten.permute %1788, %1789 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1791 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1792 = torch.aten.detach %1791 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1793 = torch.aten.view %1790, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1794 = torch.aten.abs %1793 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_24, %indices_25 = torch.aten.max.dim %1794, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%1795 = torch.aten.view %values_24, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%1796 = torch.aten.broadcast_to %1795, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1797 = torch.aten.clone %1796, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1798 = torch.aten.view %1797, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1799 = torch.aten.sub.Tensor %1792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1800 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1801 = torch.aten.pow.Tensor_Tensor %1800, %1799 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1802 = torch.aten.neg %1801 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1803 = torch.aten.neg %1802 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1804 = torch.aten.div.Tensor %1798, %1803 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1805 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1806 = torch.aten.detach %1805 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1807 = torch.aten.div.Tensor %1790, %1804 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1808 = torch.aten.add.Tensor %1807, %1806, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1809 = torch.aten.sub.Tensor %1792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1810 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1811 = torch.aten.pow.Tensor_Tensor %1810, %1809 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1812 = torch.aten.neg %1811 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1813 = torch.aten.sub.Tensor %1792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1814 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1815 = torch.aten.pow.Tensor_Tensor %1814, %1813 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1816 = torch.aten.sub.Tensor %1815, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1817 = torch.aten.gt.Tensor %1808, %1816 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1818 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1819 = torch.aten.to.dtype %1818, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1820 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1821 = torch.aten.broadcast_to %1819, %1820 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1822 = torch.valsem.aten.copy %1821, %1816, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1823 = torch.aten.where.self %1817, %1822, %1808 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1824 = torch.aten.lt.Tensor %1823, %1812 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1825 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1826 = torch.aten.to.dtype %1825, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1827 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1828 = torch.aten.broadcast_to %1826, %1827 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1829 = torch.valsem.aten.copy %1828, %1812, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1830 = torch.aten.where.self %1824, %1829, %1823 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1831 = torch.aten.round %1830 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1832 = torch.aten.sub.Tensor %1831, %1806, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1833 = torch.aten.mul.Tensor %1832, %1804 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1834 = torch.aten.broadcast_to %58, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%1835 = torch.aten.clone %1834, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%1836 = torch.aten.view %1835, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%1837 = torch.aten.mul.Tensor %59, %1836 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%1838 = torch.aten.convolution %1833, %1837, %60, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1839 = torch.aten.add.Tensor %1838, %1119, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1840 = torch.aten.clone %1839, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1841 = torch.aten.view %1840, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
%1842 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1843 = torch.aten.to.dtype %1842, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1844 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1845 = torch.aten.broadcast_to %1843, %1844 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1846 = torch.valsem.aten.copy %1845, %1841, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1847 = torch.aten.to.dtype %1846, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1848 = torch.aten.sum.dim_IntList %1847, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1849 = torch.aten.div.Scalar %1848, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1850 = torch.aten.sub.Tensor %1847, %1849, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1851 = torch.aten.mul.Tensor %1850, %1850 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1852 = torch.aten.sum.dim_IntList %1851, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1853 = torch.aten.div.Scalar %1852, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1854 = torch.aten.to.dtype %1853, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1855 = torch.aten.sum.dim_IntList %1846, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1856 = torch.aten.div.Scalar %1855, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1857 = torch.aten.add.Tensor %1854, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1858 = torch.aten.rsqrt %1857 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1859 = torch.aten.sub.Tensor %1841, %1856, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1860 = torch.aten.mul.Tensor %1859, %1858 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1861 = torch.aten.view %1860, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1862 = torch.aten.unsqueeze %61, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%1863 = torch.aten.unsqueeze %1862, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%1864 = torch.aten.mul.Tensor %1861, %1863 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1865 = torch.aten.unsqueeze %62, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%1866 = torch.aten.unsqueeze %1865, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%1867 = torch.aten.add.Tensor %1864, %1866, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1868 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1869 = torch.aten.to.dtype %1868, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1870 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1871 = torch.aten.broadcast_to %1869, %1870 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1872 = torch.valsem.aten.copy %1871, %1867, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1873 = torch.aten.sigmoid %1872 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1874 = torch.aten.mul.Tensor %1873, %1872 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1875 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1876 = torch.aten.detach %1875 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1877 = torch.aten.view %1874, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1878 = torch.aten.abs %1877 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_26, %indices_27 = torch.aten.max.dim %1878, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%1879 = torch.aten.view %values_26, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%1880 = torch.aten.broadcast_to %1879, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1881 = torch.aten.clone %1880, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1882 = torch.aten.view %1881, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1883 = torch.aten.sub.Tensor %1876, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1884 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1885 = torch.aten.pow.Tensor_Tensor %1884, %1883 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1886 = torch.aten.neg %1885 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1887 = torch.aten.neg %1886 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1888 = torch.aten.div.Tensor %1882, %1887 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1889 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1890 = torch.aten.detach %1889 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1891 = torch.aten.div.Tensor %1874, %1888 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1892 = torch.aten.add.Tensor %1891, %1890, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1893 = torch.aten.sub.Tensor %1876, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1894 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1895 = torch.aten.pow.Tensor_Tensor %1894, %1893 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1896 = torch.aten.neg %1895 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1897 = torch.aten.sub.Tensor %1876, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1898 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1899 = torch.aten.pow.Tensor_Tensor %1898, %1897 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1900 = torch.aten.sub.Tensor %1899, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1901 = torch.aten.gt.Tensor %1892, %1900 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1902 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1903 = torch.aten.to.dtype %1902, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1904 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1905 = torch.aten.broadcast_to %1903, %1904 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1906 = torch.valsem.aten.copy %1905, %1900, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1907 = torch.aten.where.self %1901, %1906, %1892 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1908 = torch.aten.lt.Tensor %1907, %1896 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1909 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1910 = torch.aten.to.dtype %1909, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1911 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1912 = torch.aten.broadcast_to %1910, %1911 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1913 = torch.valsem.aten.copy %1912, %1896, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%1914 = torch.aten.where.self %1908, %1913, %1907 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1915 = torch.aten.round %1914 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1916 = torch.aten.sub.Tensor %1915, %1890, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1917 = torch.aten.mul.Tensor %1916, %1888 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1918 = torch.aten.broadcast_to %63, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%1919 = torch.aten.clone %1918, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%1920 = torch.aten.view %1919, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%1921 = torch.aten.mul.Tensor %64, %1920 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%1922 = torch.aten.convolution %1917, %1921, %65, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1923 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%1924 = torch.aten.mul.Tensor %1923, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%1925 = torch.aten.transpose.int %66, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
%1926 = torch.aten.mm %1924, %1925 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1927 = torch.aten.mul.Scalar %67, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%1928 = torch.aten.add.Tensor %1927, %1926, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1929 = torch.aten.slice.Tensor %1928, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1930 = torch.aten.slice.Tensor %1929, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
%1931 = torch.aten.unsqueeze %1930, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16> loc(#loc1)
%1932 = torch.aten.unsqueeze %1931, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16> loc(#loc1)
%1933 = torch.aten.add.Tensor %1922, %1932, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1934 = torch.aten.view %1933, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
%1935 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1936 = torch.aten.to.dtype %1935, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1937 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1938 = torch.aten.broadcast_to %1936, %1937 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1939 = torch.valsem.aten.copy %1938, %1934, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1940 = torch.aten.to.dtype %1939, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1941 = torch.aten.sum.dim_IntList %1940, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1942 = torch.aten.div.Scalar %1941, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1943 = torch.aten.sub.Tensor %1940, %1942, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1944 = torch.aten.mul.Tensor %1943, %1943 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%1945 = torch.aten.sum.dim_IntList %1944, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1946 = torch.aten.div.Scalar %1945, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%1947 = torch.aten.to.dtype %1946, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1948 = torch.aten.sum.dim_IntList %1939, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1949 = torch.aten.div.Scalar %1948, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1950 = torch.aten.add.Tensor %1947, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1951 = torch.aten.rsqrt %1950 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%1952 = torch.aten.sub.Tensor %1934, %1949, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1953 = torch.aten.mul.Tensor %1952, %1951 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%1954 = torch.aten.view %1953, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1955 = torch.aten.unsqueeze %68, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%1956 = torch.aten.unsqueeze %1955, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%1957 = torch.aten.mul.Tensor %1954, %1956 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1958 = torch.aten.unsqueeze %69, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%1959 = torch.aten.unsqueeze %1958, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%1960 = torch.aten.add.Tensor %1957, %1959, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%1961 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1962 = torch.aten.to.dtype %1961, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1963 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%1964 = torch.aten.broadcast_to %1962, %1963 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1965 = torch.valsem.aten.copy %1964, %1960, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1966 = torch.aten.sigmoid %1965 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1967 = torch.aten.mul.Tensor %1966, %1965 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1968 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1969 = torch.aten.detach %1968 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1970 = torch.aten.view %1967, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1971 = torch.aten.abs %1970 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_28, %indices_29 = torch.aten.max.dim %1971, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%1972 = torch.aten.view %values_28, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%1973 = torch.aten.broadcast_to %1972, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1974 = torch.aten.clone %1973, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%1975 = torch.aten.view %1974, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1976 = torch.aten.sub.Tensor %1969, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1977 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1978 = torch.aten.pow.Tensor_Tensor %1977, %1976 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1979 = torch.aten.neg %1978 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1980 = torch.aten.neg %1979 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1981 = torch.aten.div.Tensor %1975, %1980 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1982 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1983 = torch.aten.detach %1982 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1984 = torch.aten.div.Tensor %1967, %1981 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1985 = torch.aten.add.Tensor %1984, %1983, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%1986 = torch.aten.sub.Tensor %1969, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1987 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1988 = torch.aten.pow.Tensor_Tensor %1987, %1986 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1989 = torch.aten.neg %1988 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1990 = torch.aten.sub.Tensor %1969, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1991 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%1992 = torch.aten.pow.Tensor_Tensor %1991, %1990 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%1993 = torch.aten.sub.Tensor %1992, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%1994 = torch.aten.gt.Tensor %1985, %1993 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%1995 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%1996 = torch.aten.to.dtype %1995, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%1997 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%1998 = torch.aten.broadcast_to %1996, %1997 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%1999 = torch.valsem.aten.copy %1998, %1993, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2000 = torch.aten.where.self %1994, %1999, %1985 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2001 = torch.aten.lt.Tensor %2000, %1989 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2002 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2003 = torch.aten.to.dtype %2002, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2004 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2005 = torch.aten.broadcast_to %2003, %2004 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2006 = torch.valsem.aten.copy %2005, %1989, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2007 = torch.aten.where.self %2001, %2006, %2000 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2008 = torch.aten.round %2007 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2009 = torch.aten.sub.Tensor %2008, %1983, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2010 = torch.aten.mul.Tensor %2009, %1981 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2011 = torch.aten.broadcast_to %70, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%2012 = torch.aten.clone %2011, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%2013 = torch.aten.view %2012, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%2014 = torch.aten.mul.Tensor %71, %2013 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%2015 = torch.aten.convolution %2010, %2014, %72, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2016 = torch.aten.add.Tensor %1839, %2015, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2017 = torch.aten.div.Tensor %2016, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2018 = torch.aten.clone %2017, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2019 = torch.aten.view %2018, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
%2020 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2021 = torch.aten.to.dtype %2020, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2022 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2023 = torch.aten.broadcast_to %2021, %2022 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%2024 = torch.valsem.aten.copy %2023, %2019, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%2025 = torch.aten.to.dtype %2024, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%2026 = torch.aten.sum.dim_IntList %2025, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2027 = torch.aten.div.Scalar %2026, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2028 = torch.aten.sub.Tensor %2025, %2027, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%2029 = torch.aten.mul.Tensor %2028, %2028 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
%2030 = torch.aten.sum.dim_IntList %2029, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2031 = torch.aten.div.Scalar %2030, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2032 = torch.aten.to.dtype %2031, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2033 = torch.aten.sum.dim_IntList %2024, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2034 = torch.aten.div.Scalar %2033, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2035 = torch.aten.add.Tensor %2032, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2036 = torch.aten.rsqrt %2035 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2037 = torch.aten.sub.Tensor %2019, %2034, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%2038 = torch.aten.mul.Tensor %2037, %2036 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
%2039 = torch.aten.view %2038, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
%2040 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2041 = torch.aten.to.dtype %2040, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2042 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2043 = torch.aten.broadcast_to %2041, %2042 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2044 = torch.valsem.aten.copy %2043, %2039, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2045 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2046 = torch.aten.detach %2045 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2047 = torch.aten.view %2044, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2048 = torch.aten.abs %2047 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_30, %indices_31 = torch.aten.max.dim %2048, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%2049 = torch.aten.view %values_30, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%2050 = torch.aten.broadcast_to %2049, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2051 = torch.aten.clone %2050, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2052 = torch.aten.view %2051, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2053 = torch.aten.sub.Tensor %2046, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2054 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2055 = torch.aten.pow.Tensor_Tensor %2054, %2053 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2056 = torch.aten.neg %2055 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2057 = torch.aten.neg %2056 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2058 = torch.aten.div.Tensor %2052, %2057 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2059 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2060 = torch.aten.detach %2059 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2061 = torch.aten.div.Tensor %2044, %2058 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2062 = torch.aten.add.Tensor %2061, %2060, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2063 = torch.aten.sub.Tensor %2046, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2064 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2065 = torch.aten.pow.Tensor_Tensor %2064, %2063 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2066 = torch.aten.neg %2065 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2067 = torch.aten.sub.Tensor %2046, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2068 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2069 = torch.aten.pow.Tensor_Tensor %2068, %2067 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2070 = torch.aten.sub.Tensor %2069, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2071 = torch.aten.gt.Tensor %2062, %2070 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2072 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2073 = torch.aten.to.dtype %2072, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2074 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2075 = torch.aten.broadcast_to %2073, %2074 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2076 = torch.valsem.aten.copy %2075, %2070, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2077 = torch.aten.where.self %2071, %2076, %2062 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2078 = torch.aten.lt.Tensor %2077, %2066 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2079 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2080 = torch.aten.to.dtype %2079, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2081 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2082 = torch.aten.broadcast_to %2080, %2081 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2083 = torch.valsem.aten.copy %2082, %2066, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2084 = torch.aten.where.self %2078, %2083, %2077 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2085 = torch.aten.round %2084 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2086 = torch.aten.sub.Tensor %2085, %2060, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2087 = torch.aten.mul.Tensor %2086, %2058 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2088 = torch.aten.broadcast_to %73, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%2089 = torch.aten.clone %2088, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%2090 = torch.aten.view %2089, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%2091 = torch.aten.mul.Tensor %74, %2090 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%2092 = torch.aten.convolution %2087, %2091, %75, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2093 = torch.aten.permute %2092, %1196 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
%2094 = torch.aten.view %2093, %1198 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2095 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%2096 = torch.aten.sum.dim_IntList %2094, %2095, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2097 = torch.aten.div.Scalar %2096, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2098 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2099 = torch.aten.broadcast_to %2097, %2098 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2100 = torch.aten.sub.Tensor %2094, %2099, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2101 = torch.aten.mul.Tensor %2100, %2100 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2102 = torch.aten.sum.dim_IntList %2101, %2095, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2103 = torch.aten.div.Scalar %2102, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2104 = torch.aten.add.Scalar %2103, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2105 = torch.aten.rsqrt %2104 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2106 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2107 = torch.aten.broadcast_to %2105, %2106 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2108 = torch.aten.mul.Tensor %2100, %2107 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2109 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2110 = torch.aten.detach %2109 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2111 = torch.aten.view %2108, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2112 = torch.aten.abs %2111 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_32, %indices_33 = torch.aten.max.dim %2112, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%2113 = torch.aten.view %values_32, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%2114 = torch.aten.broadcast_to %2113, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2115 = torch.aten.clone %2114, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2116 = torch.aten.view %2115, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2117 = torch.aten.sub.Tensor %2110, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2118 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2119 = torch.aten.pow.Tensor_Tensor %2118, %2117 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2120 = torch.aten.neg %2119 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2121 = torch.aten.neg %2120 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2122 = torch.aten.div.Tensor %2116, %2121 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2123 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2124 = torch.aten.detach %2123 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2125 = torch.aten.div.Tensor %2108, %2122 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2126 = torch.aten.add.Tensor %2125, %2124, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2127 = torch.aten.sub.Tensor %2110, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2128 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2129 = torch.aten.pow.Tensor_Tensor %2128, %2127 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2130 = torch.aten.neg %2129 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2131 = torch.aten.sub.Tensor %2110, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2132 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2133 = torch.aten.pow.Tensor_Tensor %2132, %2131 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2134 = torch.aten.sub.Tensor %2133, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2135 = torch.aten.gt.Tensor %2126, %2134 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2136 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2137 = torch.aten.to.dtype %2136, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2138 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2139 = torch.aten.broadcast_to %2137, %2138 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2140 = torch.valsem.aten.copy %2139, %2134, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2141 = torch.aten.where.self %2135, %2140, %2126 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2142 = torch.aten.lt.Tensor %2141, %2130 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2143 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2144 = torch.aten.to.dtype %2143, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2145 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2146 = torch.aten.broadcast_to %2144, %2145 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2147 = torch.valsem.aten.copy %2146, %2130, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2148 = torch.aten.where.self %2142, %2147, %2141 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2149 = torch.aten.round %2148 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2150 = torch.aten.sub.Tensor %2149, %2124, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2151 = torch.aten.mul.Tensor %2150, %2122 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2152 = torch.aten.broadcast_to %76, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2153 = torch.aten.clone %2152, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2154 = torch.aten.view %2153, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2155 = torch.aten.mul.Tensor %77, %2154 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2156 = torch.aten.transpose.int %2155, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2157 = torch.aten.view %2151, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2158 = torch.aten.mm %2157, %2156 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2159 = torch.aten.mul.Scalar %78, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2160 = torch.aten.add.Tensor %2159, %2158, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2161 = torch.aten.view %2160, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2162 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2163 = torch.aten.detach %2162 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2164 = torch.aten.view %2108, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2165 = torch.aten.abs %2164 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_34, %indices_35 = torch.aten.max.dim %2165, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%2166 = torch.aten.view %values_34, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%2167 = torch.aten.broadcast_to %2166, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2168 = torch.aten.clone %2167, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2169 = torch.aten.view %2168, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2170 = torch.aten.sub.Tensor %2163, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2171 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2172 = torch.aten.pow.Tensor_Tensor %2171, %2170 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2173 = torch.aten.neg %2172 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2174 = torch.aten.neg %2173 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2175 = torch.aten.div.Tensor %2169, %2174 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2176 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2177 = torch.aten.detach %2176 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2178 = torch.aten.div.Tensor %2108, %2175 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2179 = torch.aten.add.Tensor %2178, %2177, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2180 = torch.aten.sub.Tensor %2163, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2181 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2182 = torch.aten.pow.Tensor_Tensor %2181, %2180 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2183 = torch.aten.neg %2182 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2184 = torch.aten.sub.Tensor %2163, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2185 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2186 = torch.aten.pow.Tensor_Tensor %2185, %2184 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2187 = torch.aten.sub.Tensor %2186, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2188 = torch.aten.gt.Tensor %2179, %2187 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2189 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2190 = torch.aten.to.dtype %2189, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2191 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2192 = torch.aten.broadcast_to %2190, %2191 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2193 = torch.valsem.aten.copy %2192, %2187, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2194 = torch.aten.where.self %2188, %2193, %2179 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2195 = torch.aten.lt.Tensor %2194, %2183 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2196 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2197 = torch.aten.to.dtype %2196, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2198 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2199 = torch.aten.broadcast_to %2197, %2198 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2200 = torch.valsem.aten.copy %2199, %2183, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2201 = torch.aten.where.self %2195, %2200, %2194 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2202 = torch.aten.round %2201 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2203 = torch.aten.sub.Tensor %2202, %2177, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2204 = torch.aten.mul.Tensor %2203, %2175 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2205 = torch.aten.broadcast_to %79, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2206 = torch.aten.clone %2205, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2207 = torch.aten.view %2206, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2208 = torch.aten.mul.Tensor %80, %2207 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2209 = torch.aten.transpose.int %2208, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2210 = torch.aten.view %2204, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2211 = torch.aten.mm %2210, %2209 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2212 = torch.aten.mul.Scalar %81, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2213 = torch.aten.add.Tensor %2212, %2211, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2214 = torch.aten.view %2213, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2215 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2216 = torch.aten.detach %2215 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2217 = torch.aten.view %2108, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2218 = torch.aten.abs %2217 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_36, %indices_37 = torch.aten.max.dim %2218, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%2219 = torch.aten.view %values_36, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%2220 = torch.aten.broadcast_to %2219, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2221 = torch.aten.clone %2220, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2222 = torch.aten.view %2221, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2223 = torch.aten.sub.Tensor %2216, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2224 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2225 = torch.aten.pow.Tensor_Tensor %2224, %2223 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2226 = torch.aten.neg %2225 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2227 = torch.aten.neg %2226 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2228 = torch.aten.div.Tensor %2222, %2227 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2229 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2230 = torch.aten.detach %2229 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2231 = torch.aten.div.Tensor %2108, %2228 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2232 = torch.aten.add.Tensor %2231, %2230, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2233 = torch.aten.sub.Tensor %2216, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2234 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2235 = torch.aten.pow.Tensor_Tensor %2234, %2233 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2236 = torch.aten.neg %2235 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2237 = torch.aten.sub.Tensor %2216, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2238 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2239 = torch.aten.pow.Tensor_Tensor %2238, %2237 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2240 = torch.aten.sub.Tensor %2239, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2241 = torch.aten.gt.Tensor %2232, %2240 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2242 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2243 = torch.aten.to.dtype %2242, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2244 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2245 = torch.aten.broadcast_to %2243, %2244 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2246 = torch.valsem.aten.copy %2245, %2240, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2247 = torch.aten.where.self %2241, %2246, %2232 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2248 = torch.aten.lt.Tensor %2247, %2236 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2249 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2250 = torch.aten.to.dtype %2249, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2251 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2252 = torch.aten.broadcast_to %2250, %2251 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2253 = torch.valsem.aten.copy %2252, %2236, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2254 = torch.aten.where.self %2248, %2253, %2247 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2255 = torch.aten.round %2254 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2256 = torch.aten.sub.Tensor %2255, %2230, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2257 = torch.aten.mul.Tensor %2256, %2228 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2258 = torch.aten.broadcast_to %82, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2259 = torch.aten.clone %2258, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2260 = torch.aten.view %2259, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2261 = torch.aten.mul.Tensor %83, %2260 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2262 = torch.aten.transpose.int %2261, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2263 = torch.aten.view %2257, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2264 = torch.aten.mm %2263, %2262 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2265 = torch.aten.mul.Scalar %84, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2266 = torch.aten.add.Tensor %2265, %2264, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2267 = torch.aten.view %2266, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2268 = torch.aten.view %2161, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2269 = torch.aten.permute %2268, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2270 = torch.aten.clone %2269, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2271 = torch.aten.view %2270, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2272 = torch.aten.view %2214, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2273 = torch.aten.permute %2272, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2274 = torch.aten.clone %2273, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2275 = torch.aten.view %2274, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2276 = torch.aten.view %2267, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2277 = torch.aten.permute %2276, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2278 = torch.aten.clone %2277, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2279 = torch.aten.view %2278, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2280 = torch.aten.transpose.int %2275, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
%2281 = torch.aten.broadcast_to %2271, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2282 = torch.aten.view %2281, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2283 = torch.aten.broadcast_to %2280, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
%2284 = torch.aten.view %2283, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
%2285 = torch.aten.bmm %2282, %2284 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2286 = torch.aten.view %2285, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2287 = torch.aten.mul.Tensor %2286, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%values_38, %indices_39 = torch.aten.max.dim %2287, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
%2288 = torch.aten.sub.Tensor %2287, %values_38, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2289 = torch.aten.exp %2288 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2290 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%2291 = torch.aten.sum.dim_IntList %2289, %2290, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
%2292 = torch.aten.div.Tensor %2289, %2291 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2293 = torch.aten.broadcast_to %2292, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2294 = torch.aten.view %2293, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
%2295 = torch.aten.broadcast_to %2279, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2296 = torch.aten.view %2295, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2297 = torch.aten.bmm %2294, %2296 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2298 = torch.aten.view %2297, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2299 = torch.aten.view %2298, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2300 = torch.aten.permute %2299, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2301 = torch.aten.clone %2300, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2302 = torch.aten.view %2301, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2303 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2304 = torch.aten.detach %2303 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2305 = torch.aten.view %2302, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2306 = torch.aten.abs %2305 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_40, %indices_41 = torch.aten.max.dim %2306, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%2307 = torch.aten.view %values_40, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%2308 = torch.aten.broadcast_to %2307, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2309 = torch.aten.clone %2308, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2310 = torch.aten.view %2309, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2311 = torch.aten.sub.Tensor %2304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2312 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2313 = torch.aten.pow.Tensor_Tensor %2312, %2311 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2314 = torch.aten.neg %2313 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2315 = torch.aten.neg %2314 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2316 = torch.aten.div.Tensor %2310, %2315 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2317 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2318 = torch.aten.detach %2317 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2319 = torch.aten.div.Tensor %2302, %2316 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2320 = torch.aten.add.Tensor %2319, %2318, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2321 = torch.aten.sub.Tensor %2304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2322 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2323 = torch.aten.pow.Tensor_Tensor %2322, %2321 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2324 = torch.aten.neg %2323 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2325 = torch.aten.sub.Tensor %2304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2326 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2327 = torch.aten.pow.Tensor_Tensor %2326, %2325 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2328 = torch.aten.sub.Tensor %2327, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2329 = torch.aten.gt.Tensor %2320, %2328 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2330 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2331 = torch.aten.to.dtype %2330, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2332 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2333 = torch.aten.broadcast_to %2331, %2332 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2334 = torch.valsem.aten.copy %2333, %2328, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2335 = torch.aten.where.self %2329, %2334, %2320 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2336 = torch.aten.lt.Tensor %2335, %2324 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2337 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2338 = torch.aten.to.dtype %2337, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2339 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2340 = torch.aten.broadcast_to %2338, %2339 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2341 = torch.valsem.aten.copy %2340, %2324, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2342 = torch.aten.where.self %2336, %2341, %2335 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2343 = torch.aten.round %2342 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2344 = torch.aten.sub.Tensor %2343, %2318, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2345 = torch.aten.mul.Tensor %2344, %2316 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2346 = torch.aten.broadcast_to %85, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2347 = torch.aten.clone %2346, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2348 = torch.aten.view %2347, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2349 = torch.aten.mul.Tensor %86, %2348 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2350 = torch.aten.transpose.int %2349, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2351 = torch.aten.view %2345, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2352 = torch.aten.mm %2351, %2350 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2353 = torch.aten.mul.Scalar %87, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2354 = torch.aten.add.Tensor %2353, %2352, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2355 = torch.aten.view %2354, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2356 = torch.aten.add.Tensor %2355, %2094, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2357 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%2358 = torch.aten.sum.dim_IntList %2356, %2357, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2359 = torch.aten.div.Scalar %2358, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2360 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2361 = torch.aten.broadcast_to %2359, %2360 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2362 = torch.aten.sub.Tensor %2356, %2361, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2363 = torch.aten.mul.Tensor %2362, %2362 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2364 = torch.aten.sum.dim_IntList %2363, %2357, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2365 = torch.aten.div.Scalar %2364, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2366 = torch.aten.add.Scalar %2365, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2367 = torch.aten.rsqrt %2366 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2368 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2369 = torch.aten.broadcast_to %2367, %2368 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2370 = torch.aten.mul.Tensor %2362, %2369 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2371 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2372 = torch.aten.detach %2371 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2373 = torch.aten.view %2370, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2374 = torch.aten.abs %2373 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_42, %indices_43 = torch.aten.max.dim %2374, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%2375 = torch.aten.view %values_42, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%2376 = torch.aten.broadcast_to %2375, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2377 = torch.aten.clone %2376, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2378 = torch.aten.view %2377, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2379 = torch.aten.sub.Tensor %2372, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2380 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2381 = torch.aten.pow.Tensor_Tensor %2380, %2379 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2382 = torch.aten.neg %2381 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2383 = torch.aten.neg %2382 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2384 = torch.aten.div.Tensor %2378, %2383 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2385 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2386 = torch.aten.detach %2385 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2387 = torch.aten.div.Tensor %2370, %2384 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2388 = torch.aten.add.Tensor %2387, %2386, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2389 = torch.aten.sub.Tensor %2372, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2390 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2391 = torch.aten.pow.Tensor_Tensor %2390, %2389 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2392 = torch.aten.neg %2391 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2393 = torch.aten.sub.Tensor %2372, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2394 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2395 = torch.aten.pow.Tensor_Tensor %2394, %2393 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2396 = torch.aten.sub.Tensor %2395, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2397 = torch.aten.gt.Tensor %2388, %2396 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2398 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2399 = torch.aten.to.dtype %2398, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2400 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2401 = torch.aten.broadcast_to %2399, %2400 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2402 = torch.valsem.aten.copy %2401, %2396, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2403 = torch.aten.where.self %2397, %2402, %2388 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2404 = torch.aten.lt.Tensor %2403, %2392 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2405 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2406 = torch.aten.to.dtype %2405, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2407 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2408 = torch.aten.broadcast_to %2406, %2407 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2409 = torch.valsem.aten.copy %2408, %2392, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2410 = torch.aten.where.self %2404, %2409, %2403 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2411 = torch.aten.round %2410 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2412 = torch.aten.sub.Tensor %2411, %2386, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2413 = torch.aten.mul.Tensor %2412, %2384 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2414 = torch.aten.broadcast_to %88, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2415 = torch.aten.clone %2414, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2416 = torch.aten.view %2415, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2417 = torch.aten.mul.Tensor %89, %2416 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2418 = torch.aten.transpose.int %2417, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2419 = torch.aten.view %2413, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2420 = torch.aten.mm %2419, %2418 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2421 = torch.aten.mul.Scalar %90, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2422 = torch.aten.add.Tensor %2421, %2420, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2423 = torch.aten.view %2422, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2424 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2425 = torch.aten.detach %2424 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2426 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%2427 = torch.aten.abs %2426 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_44, %indices_45 = torch.aten.max.dim %2427, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%2428 = torch.aten.view %values_44, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%2429 = torch.aten.broadcast_to %2428, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%2430 = torch.aten.clone %2429, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%2431 = torch.aten.view %2430, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2432 = torch.aten.sub.Tensor %2425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2433 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2434 = torch.aten.pow.Tensor_Tensor %2433, %2432 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2435 = torch.aten.neg %2434 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2436 = torch.aten.neg %2435 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2437 = torch.aten.div.Tensor %2431, %2436 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2438 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2439 = torch.aten.detach %2438 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2440 = torch.aten.div.Tensor %arg2, %2437 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2441 = torch.aten.add.Tensor %2440, %2439, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2442 = torch.aten.sub.Tensor %2425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2443 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2444 = torch.aten.pow.Tensor_Tensor %2443, %2442 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2445 = torch.aten.neg %2444 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2446 = torch.aten.sub.Tensor %2425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2447 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2448 = torch.aten.pow.Tensor_Tensor %2447, %2446 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2449 = torch.aten.sub.Tensor %2448, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2450 = torch.aten.gt.Tensor %2441, %2449 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%2451 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2452 = torch.aten.to.dtype %2451, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2453 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2454 = torch.aten.broadcast_to %2452, %2453 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2455 = torch.valsem.aten.copy %2454, %2449, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2456 = torch.aten.where.self %2450, %2455, %2441 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2457 = torch.aten.lt.Tensor %2456, %2445 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%2458 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2459 = torch.aten.to.dtype %2458, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2460 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2461 = torch.aten.broadcast_to %2459, %2460 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2462 = torch.valsem.aten.copy %2461, %2445, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2463 = torch.aten.where.self %2457, %2462, %2456 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2464 = torch.aten.round %2463 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2465 = torch.aten.sub.Tensor %2464, %2439, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2466 = torch.aten.mul.Tensor %2465, %2437 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2467 = torch.aten.broadcast_to %91, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%2468 = torch.aten.clone %2467, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%2469 = torch.aten.view %2468, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%2470 = torch.aten.mul.Tensor %92, %2469 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%2471 = torch.aten.transpose.int %2470, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
%2472 = torch.aten.view %2466, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%2473 = torch.aten.mm %2472, %2471 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
%2474 = torch.aten.view %2473, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
%2475 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2476 = torch.aten.detach %2475 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2477 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%2478 = torch.aten.abs %2477 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_46, %indices_47 = torch.aten.max.dim %2478, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%2479 = torch.aten.view %values_46, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%2480 = torch.aten.broadcast_to %2479, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%2481 = torch.aten.clone %2480, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%2482 = torch.aten.view %2481, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2483 = torch.aten.sub.Tensor %2476, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2484 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2485 = torch.aten.pow.Tensor_Tensor %2484, %2483 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2486 = torch.aten.neg %2485 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2487 = torch.aten.neg %2486 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2488 = torch.aten.div.Tensor %2482, %2487 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2489 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2490 = torch.aten.detach %2489 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2491 = torch.aten.div.Tensor %arg2, %2488 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2492 = torch.aten.add.Tensor %2491, %2490, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2493 = torch.aten.sub.Tensor %2476, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2494 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2495 = torch.aten.pow.Tensor_Tensor %2494, %2493 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2496 = torch.aten.neg %2495 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2497 = torch.aten.sub.Tensor %2476, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2498 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2499 = torch.aten.pow.Tensor_Tensor %2498, %2497 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2500 = torch.aten.sub.Tensor %2499, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2501 = torch.aten.gt.Tensor %2492, %2500 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%2502 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2503 = torch.aten.to.dtype %2502, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2504 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2505 = torch.aten.broadcast_to %2503, %2504 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2506 = torch.valsem.aten.copy %2505, %2500, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2507 = torch.aten.where.self %2501, %2506, %2492 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2508 = torch.aten.lt.Tensor %2507, %2496 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%2509 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2510 = torch.aten.to.dtype %2509, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2511 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2512 = torch.aten.broadcast_to %2510, %2511 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2513 = torch.valsem.aten.copy %2512, %2496, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2514 = torch.aten.where.self %2508, %2513, %2507 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2515 = torch.aten.round %2514 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2516 = torch.aten.sub.Tensor %2515, %2490, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2517 = torch.aten.mul.Tensor %2516, %2488 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%2518 = torch.aten.broadcast_to %93, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%2519 = torch.aten.clone %2518, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
%2520 = torch.aten.view %2519, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%2521 = torch.aten.mul.Tensor %94, %2520 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
%2522 = torch.aten.transpose.int %2521, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
%2523 = torch.aten.view %2517, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%2524 = torch.aten.mm %2523, %2522 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
%2525 = torch.aten.view %2524, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
%2526 = torch.aten.view %2423, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2527 = torch.aten.permute %2526, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2528 = torch.aten.clone %2527, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2529 = torch.aten.view %2528, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2530 = torch.aten.view %2474, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
%2531 = torch.aten.permute %2530, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%2532 = torch.aten.clone %2531, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%2533 = torch.aten.view %2532, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%2534 = torch.aten.view %2525, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
%2535 = torch.aten.permute %2534, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%2536 = torch.aten.clone %2535, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
%2537 = torch.aten.view %2536, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%2538 = torch.aten.transpose.int %2533, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
%2539 = torch.aten.broadcast_to %2529, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2540 = torch.aten.view %2539, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2541 = torch.aten.broadcast_to %2538, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
%2542 = torch.aten.view %2541, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
%2543 = torch.aten.bmm %2540, %2542 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2544 = torch.aten.view %2543, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2545 = torch.aten.mul.Tensor %2544, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%values_48, %indices_49 = torch.aten.max.dim %2545, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
%2546 = torch.aten.sub.Tensor %2545, %values_48, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2547 = torch.aten.exp %2546 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2548 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%2549 = torch.aten.sum.dim_IntList %2547, %2548, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
%2550 = torch.aten.div.Tensor %2547, %2549 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2551 = torch.aten.broadcast_to %2550, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2552 = torch.aten.view %2551, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
%2553 = torch.aten.broadcast_to %2537, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%2554 = torch.aten.view %2553, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
%2555 = torch.aten.bmm %2552, %2554 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2556 = torch.aten.view %2555, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
%2557 = torch.aten.view %2556, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
%2558 = torch.aten.permute %2557, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2559 = torch.aten.clone %2558, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
%2560 = torch.aten.view %2559, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2561 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2562 = torch.aten.detach %2561 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2563 = torch.aten.view %2560, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2564 = torch.aten.abs %2563 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%values_50, %indices_51 = torch.aten.max.dim %2564, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
%2565 = torch.aten.view %values_50, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
%2566 = torch.aten.broadcast_to %2565, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2567 = torch.aten.clone %2566, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
%2568 = torch.aten.view %2567, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2569 = torch.aten.sub.Tensor %2562, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2570 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2571 = torch.aten.pow.Tensor_Tensor %2570, %2569 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2572 = torch.aten.neg %2571 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2573 = torch.aten.neg %2572 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2574 = torch.aten.div.Tensor %2568, %2573 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2575 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2576 = torch.aten.detach %2575 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2577 = torch.aten.div.Tensor %2560, %2574 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2578 = torch.aten.add.Tensor %2577, %2576, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2579 = torch.aten.sub.Tensor %2562, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2580 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2581 = torch.aten.pow.Tensor_Tensor %2580, %2579 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2582 = torch.aten.neg %2581 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2583 = torch.aten.sub.Tensor %2562, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2584 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2585 = torch.aten.pow.Tensor_Tensor %2584, %2583 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2586 = torch.aten.sub.Tensor %2585, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2587 = torch.aten.gt.Tensor %2578, %2586 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2588 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2589 = torch.aten.to.dtype %2588, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2590 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2591 = torch.aten.broadcast_to %2589, %2590 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2592 = torch.valsem.aten.copy %2591, %2586, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2593 = torch.aten.where.self %2587, %2592, %2578 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2594 = torch.aten.lt.Tensor %2593, %2582 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
%2595 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2596 = torch.aten.to.dtype %2595, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2597 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2598 = torch.aten.broadcast_to %2596, %2597 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2599 = torch.valsem.aten.copy %2598, %2582, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2600 = torch.aten.where.self %2594, %2599, %2593 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2601 = torch.aten.round %2600 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2602 = torch.aten.sub.Tensor %2601, %2576, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2603 = torch.aten.mul.Tensor %2602, %2574 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2604 = torch.aten.broadcast_to %95, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2605 = torch.aten.clone %2604, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
%2606 = torch.aten.view %2605, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2607 = torch.aten.mul.Tensor %96, %2606 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2608 = torch.aten.transpose.int %2607, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
%2609 = torch.aten.view %2603, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2610 = torch.aten.mm %2609, %2608 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2611 = torch.aten.mul.Scalar %97, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2612 = torch.aten.add.Tensor %2611, %2610, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2613 = torch.aten.view %2612, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2614 = torch.aten.add.Tensor %2613, %2356, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2615 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%2616 = torch.aten.sum.dim_IntList %2614, %2615, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2617 = torch.aten.div.Scalar %2616, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2618 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2619 = torch.aten.broadcast_to %2617, %2618 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2620 = torch.aten.sub.Tensor %2614, %2619, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2621 = torch.aten.mul.Tensor %2620, %2620 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2622 = torch.aten.sum.dim_IntList %2621, %2615, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2623 = torch.aten.div.Scalar %2622, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2624 = torch.aten.add.Scalar %2623, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2625 = torch.aten.rsqrt %2624 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
%2626 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2627 = torch.aten.broadcast_to %2625, %2626 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2628 = torch.aten.mul.Tensor %2620, %2627 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2629 = torch.aten.broadcast_to %98, %1756 : !torch.vtensor<[2560,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
%2630 = torch.aten.clone %2629, %int0 : !torch.vtensor<[2560,20,16],f16>, !torch.int -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
%2631 = torch.aten.view %2630, %1759 : !torch.vtensor<[2560,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
%2632 = torch.aten.mul.Tensor %99, %2631 : !torch.vtensor<[2560,320],si8>, !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
%2633 = torch.aten.transpose.int %2632, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16> loc(#loc1)
%2634 = torch.aten.view %2628, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2635 = torch.aten.mm %2634, %2633 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
%2636 = torch.aten.mul.Scalar %100, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16> loc(#loc1)
%2637 = torch.aten.add.Tensor %2636, %2635, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
%2638 = torch.aten.view %2637, %1767 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16> loc(#loc1)
%2639 = torch.aten.slice.Tensor %2638, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%2640 = torch.aten.slice.Tensor %2638, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%2641 = torch.aten.gelu %2640, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%2642 = torch.aten.mul.Tensor %2639, %2641 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
%2643 = torch.aten.broadcast_to %101, %1773 : !torch.vtensor<[320,80,1],f16>, !torch.list<int> -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
%2644 = torch.aten.clone %2643, %int0 : !torch.vtensor<[320,80,16],f16>, !torch.int -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
%2645 = torch.aten.view %2644, %1776 : !torch.vtensor<[320,80,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
%2646 = torch.aten.mul.Tensor %102, %2645 : !torch.vtensor<[320,1280],si8>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
%2647 = torch.aten.transpose.int %2646, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
%2648 = torch.aten.view %2642, %1780 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16> loc(#loc1)
%2649 = torch.aten.mm %2648, %2647 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2650 = torch.aten.mul.Scalar %103, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
%2651 = torch.aten.add.Tensor %2650, %2649, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
%2652 = torch.aten.view %2651, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2653 = torch.aten.add.Tensor %2652, %2614, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
%2654 = torch.aten.view %2653, %1787 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
%2655 = torch.aten.permute %2654, %1789 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2656 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2657 = torch.aten.detach %2656 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2658 = torch.aten.view %2655, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2659 = torch.aten.abs %2658 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_52, %indices_53 = torch.aten.max.dim %2659, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%2660 = torch.aten.view %values_52, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%2661 = torch.aten.broadcast_to %2660, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2662 = torch.aten.clone %2661, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2663 = torch.aten.view %2662, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2664 = torch.aten.sub.Tensor %2657, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2665 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2666 = torch.aten.pow.Tensor_Tensor %2665, %2664 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2667 = torch.aten.neg %2666 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2668 = torch.aten.neg %2667 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2669 = torch.aten.div.Tensor %2663, %2668 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2670 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2671 = torch.aten.detach %2670 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2672 = torch.aten.div.Tensor %2655, %2669 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2673 = torch.aten.add.Tensor %2672, %2671, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2674 = torch.aten.sub.Tensor %2657, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2675 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2676 = torch.aten.pow.Tensor_Tensor %2675, %2674 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2677 = torch.aten.neg %2676 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2678 = torch.aten.sub.Tensor %2657, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2679 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2680 = torch.aten.pow.Tensor_Tensor %2679, %2678 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2681 = torch.aten.sub.Tensor %2680, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2682 = torch.aten.gt.Tensor %2673, %2681 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2683 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2684 = torch.aten.to.dtype %2683, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2685 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2686 = torch.aten.broadcast_to %2684, %2685 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2687 = torch.valsem.aten.copy %2686, %2681, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2688 = torch.aten.where.self %2682, %2687, %2673 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2689 = torch.aten.lt.Tensor %2688, %2677 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2690 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2691 = torch.aten.to.dtype %2690, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2692 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2693 = torch.aten.broadcast_to %2691, %2692 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2694 = torch.valsem.aten.copy %2693, %2677, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2695 = torch.aten.where.self %2689, %2694, %2688 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2696 = torch.aten.round %2695 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2697 = torch.aten.sub.Tensor %2696, %2671, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2698 = torch.aten.mul.Tensor %2697, %2669 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2699 = torch.aten.broadcast_to %104, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%2700 = torch.aten.clone %2699, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
%2701 = torch.aten.view %2700, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%2702 = torch.aten.mul.Tensor %105, %2701 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
%2703 = torch.aten.convolution %2698, %2702, %106, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2704 = torch.aten.add.Tensor %2703, %2017, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2705 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2706 = torch.aten.detach %2705 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2707 = torch.aten.view %2704, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2708 = torch.aten.abs %2707 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%values_54, %indices_55 = torch.aten.max.dim %2708, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
%2709 = torch.aten.view %values_54, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
%2710 = torch.aten.broadcast_to %2709, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2711 = torch.aten.clone %2710, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
%2712 = torch.aten.view %2711, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2713 = torch.aten.sub.Tensor %2706, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2714 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2715 = torch.aten.pow.Tensor_Tensor %2714, %2713 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2716 = torch.aten.neg %2715 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2717 = torch.aten.neg %2716 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2718 = torch.aten.div.Tensor %2712, %2717 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2719 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2720 = torch.aten.detach %2719 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2721 = torch.aten.div.Tensor %2704, %2718 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2722 = torch.aten.add.Tensor %2721, %2720, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2723 = torch.aten.sub.Tensor %2706, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2724 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2725 = torch.aten.pow.Tensor_Tensor %2724, %2723 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2726 = torch.aten.neg %2725 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2727 = torch.aten.sub.Tensor %2706, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2728 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2729 = torch.aten.pow.Tensor_Tensor %2728, %2727 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2730 = torch.aten.sub.Tensor %2729, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2731 = torch.aten.gt.Tensor %2722, %2730 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2732 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2733 = torch.aten.to.dtype %2732, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2734 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2735 = torch.aten.broadcast_to %2733, %2734 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2736 = torch.valsem.aten.copy %2735, %2730, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2737 = torch.aten.where.self %2731, %2736, %2722 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2738 = torch.aten.lt.Tensor %2737, %2726 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
%2739 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2740 = torch.aten.to.dtype %2739, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2741 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2742 = torch.aten.broadcast_to %2740, %2741 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2743 = torch.valsem.aten.copy %2742, %2726, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2744 = torch.aten.where.self %2738, %2743, %2737 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2745 = torch.aten.round %2744 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2746 = torch.aten.sub.Tensor %2745, %2720, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2747 = torch.aten.mul.Tensor %2746, %2718 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
%2748 = torch.aten.broadcast_to %107, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%2749 = torch.aten.clone %2748, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
%2750 = torch.aten.view %2749, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%2751 = torch.aten.mul.Tensor %108, %2750 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
%2752 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%2753 = torch.aten.convolution %2747, %2751, %109, %2752, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2754 = torch.aten.clone %2753, %int0 : !torch.vtensor<[2,320,32,32],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2755 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2756 = torch.aten.view %2754, %2755 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16> loc(#loc1)
%2757 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2758 = torch.aten.to.dtype %2757, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2759 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2760 = torch.aten.broadcast_to %2758, %2759 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
%2761 = torch.valsem.aten.copy %2760, %2756, %false : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,10,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
%2762 = torch.aten.to.dtype %2761, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f64> loc(#loc1)
%2763 = torch.aten.sum.dim_IntList %2762, %943, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2764 = torch.aten.div.Scalar %2763, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2765 = torch.aten.sub.Tensor %2762, %2764, %float1.000000e00 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,1024],f64> loc(#loc1)
%2766 = torch.aten.mul.Tensor %2765, %2765 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,10,1024],f64> -> !torch.vtensor<[2,32,10,1024],f64> loc(#loc1)
%2767 = torch.aten.sum.dim_IntList %2766, %943, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2768 = torch.aten.div.Scalar %2767, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2769 = torch.aten.to.dtype %2768, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2770 = torch.aten.sum.dim_IntList %2761, %943, %true, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2771 = torch.aten.div.Scalar %2770, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2772 = torch.aten.add.Tensor %2769, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2773 = torch.aten.rsqrt %2772 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2774 = torch.aten.sub.Tensor %2756, %2771, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
%2775 = torch.aten.mul.Tensor %2774, %2773 : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
%2776 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2777 = torch.aten.view %2775, %2776 : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f32> loc(#loc1)
%2778 = torch.aten.unsqueeze %110, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%2779 = torch.aten.unsqueeze %2778, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%2780 = torch.aten.mul.Tensor %2777, %2779 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f32> loc(#loc1)
%2781 = torch.aten.unsqueeze %111, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
%2782 = torch.aten.unsqueeze %2781, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
%2783 = torch.aten.add.Tensor %2780, %2782, %int1 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f32> loc(#loc1)
%2784 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2785 = torch.aten.to.dtype %2784, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2786 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2787 = torch.aten.broadcast_to %2785, %2786 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2788 = torch.valsem.aten.copy %2787, %2783, %false : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f32>, !torch.bool -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2789 = torch.aten.sigmoid %2788 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2790 = torch.aten.mul.Tensor %2789, %2788 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2791 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2792 = torch.aten.detach %2791 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2793 = torch.prim.ListConstruct %int2, %int20, %int16, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2794 = torch.aten.view %2790, %2793 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%2795 = torch.aten.abs %2794 : !torch.vtensor<[2,20,16,32,32],f16> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%values_56, %indices_57 = torch.aten.max.dim %2795, %int2, %true : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,32,32],f16>, !torch.vtensor<[2,20,1,32,32],si64> loc(#loc1)
%2796 = torch.prim.ListConstruct %int2, %int20, %int1, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2797 = torch.aten.view %values_56, %2796 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,32,32],f16> loc(#loc1)
%2798 = torch.aten.broadcast_to %2797, %2793 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%2799 = torch.aten.clone %2798, %int0 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%2800 = torch.aten.view %2799, %2776 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2801 = torch.aten.sub.Tensor %2792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2802 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2803 = torch.aten.pow.Tensor_Tensor %2802, %2801 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2804 = torch.aten.neg %2803 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2805 = torch.aten.neg %2804 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2806 = torch.aten.div.Tensor %2800, %2805 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2807 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2808 = torch.aten.detach %2807 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2809 = torch.aten.div.Tensor %2790, %2806 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2810 = torch.aten.add.Tensor %2809, %2808, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2811 = torch.aten.sub.Tensor %2792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2812 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2813 = torch.aten.pow.Tensor_Tensor %2812, %2811 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2814 = torch.aten.neg %2813 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2815 = torch.aten.sub.Tensor %2792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2816 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2817 = torch.aten.pow.Tensor_Tensor %2816, %2815 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2818 = torch.aten.sub.Tensor %2817, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2819 = torch.aten.gt.Tensor %2810, %2818 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
%2820 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2821 = torch.aten.to.dtype %2820, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2822 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2823 = torch.aten.broadcast_to %2821, %2822 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2824 = torch.valsem.aten.copy %2823, %2818, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2825 = torch.aten.where.self %2819, %2824, %2810 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2826 = torch.aten.lt.Tensor %2825, %2814 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
%2827 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2828 = torch.aten.to.dtype %2827, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2829 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2830 = torch.aten.broadcast_to %2828, %2829 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2831 = torch.valsem.aten.copy %2830, %2814, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2832 = torch.aten.where.self %2826, %2831, %2825 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2833 = torch.aten.round %2832 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2834 = torch.aten.sub.Tensor %2833, %2808, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2835 = torch.aten.mul.Tensor %2834, %2806 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2836 = torch.prim.ListConstruct %int640, %int20, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%2837 = torch.aten.broadcast_to %112, %2836 : !torch.vtensor<[640,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,20,16,3,3],f16> loc(#loc1)
%2838 = torch.aten.clone %2837, %int0 : !torch.vtensor<[640,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,20,16,3,3],f16> loc(#loc1)
%2839 = torch.prim.ListConstruct %int640, %int320, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%2840 = torch.aten.view %2838, %2839 : !torch.vtensor<[640,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,320,3,3],f16> loc(#loc1)
%2841 = torch.aten.mul.Tensor %113, %2840 : !torch.vtensor<[640,320,3,3],si8>, !torch.vtensor<[640,320,3,3],f16> -> !torch.vtensor<[640,320,3,3],f16> loc(#loc1)
%2842 = torch.aten.convolution %2835, %2841, %114, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2843 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%2844 = torch.aten.mul.Tensor %2843, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%2845 = torch.aten.transpose.int %115, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16> loc(#loc1)
%2846 = torch.aten.mm %2844, %2845 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16> loc(#loc1)
%2847 = torch.aten.mul.Scalar %116, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%2848 = torch.aten.add.Tensor %2847, %2846, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
%2849 = torch.aten.slice.Tensor %2848, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
%2850 = torch.aten.slice.Tensor %2849, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
%2851 = torch.aten.unsqueeze %2850, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16> loc(#loc1)
%2852 = torch.aten.unsqueeze %2851, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16> loc(#loc1)
%2853 = torch.aten.add.Tensor %2842, %2852, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2854 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2855 = torch.aten.view %2853, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
%2856 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2857 = torch.aten.to.dtype %2856, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2858 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2859 = torch.aten.broadcast_to %2857, %2858 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%2860 = torch.valsem.aten.copy %2859, %2855, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%2861 = torch.aten.to.dtype %2860, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%2862 = torch.aten.sum.dim_IntList %2861, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2863 = torch.aten.div.Scalar %2862, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2864 = torch.aten.sub.Tensor %2861, %2863, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%2865 = torch.aten.mul.Tensor %2864, %2864 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%2866 = torch.aten.sum.dim_IntList %2865, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2867 = torch.aten.div.Scalar %2866, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%2868 = torch.aten.to.dtype %2867, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2869 = torch.aten.sum.dim_IntList %2860, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2870 = torch.aten.div.Scalar %2869, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2871 = torch.aten.add.Tensor %2868, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2872 = torch.aten.rsqrt %2871 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%2873 = torch.aten.sub.Tensor %2855, %2870, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%2874 = torch.aten.mul.Tensor %2873, %2872 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%2875 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2876 = torch.aten.view %2874, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%2877 = torch.aten.unsqueeze %117, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%2878 = torch.aten.unsqueeze %2877, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%2879 = torch.aten.mul.Tensor %2876, %2878 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%2880 = torch.aten.unsqueeze %118, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%2881 = torch.aten.unsqueeze %2880, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%2882 = torch.aten.add.Tensor %2879, %2881, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%2883 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2884 = torch.aten.to.dtype %2883, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2885 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2886 = torch.aten.broadcast_to %2884, %2885 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2887 = torch.valsem.aten.copy %2886, %2882, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2888 = torch.aten.sigmoid %2887 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2889 = torch.aten.mul.Tensor %2888, %2887 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2890 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2891 = torch.aten.detach %2890 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2892 = torch.prim.ListConstruct %int2, %int40, %int16, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2893 = torch.aten.view %2889, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%2894 = torch.aten.abs %2893 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_58, %indices_59 = torch.aten.max.dim %2894, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%2895 = torch.prim.ListConstruct %int2, %int40, %int1, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%2896 = torch.aten.view %values_58, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%2897 = torch.aten.broadcast_to %2896, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%2898 = torch.aten.clone %2897, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%2899 = torch.aten.view %2898, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2900 = torch.aten.sub.Tensor %2891, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2901 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2902 = torch.aten.pow.Tensor_Tensor %2901, %2900 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2903 = torch.aten.neg %2902 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2904 = torch.aten.neg %2903 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2905 = torch.aten.div.Tensor %2899, %2904 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2906 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2907 = torch.aten.detach %2906 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2908 = torch.aten.div.Tensor %2889, %2905 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2909 = torch.aten.add.Tensor %2908, %2907, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2910 = torch.aten.sub.Tensor %2891, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2911 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2912 = torch.aten.pow.Tensor_Tensor %2911, %2910 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2913 = torch.aten.neg %2912 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2914 = torch.aten.sub.Tensor %2891, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2915 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2916 = torch.aten.pow.Tensor_Tensor %2915, %2914 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2917 = torch.aten.sub.Tensor %2916, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2918 = torch.aten.gt.Tensor %2909, %2917 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%2919 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2920 = torch.aten.to.dtype %2919, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2921 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2922 = torch.aten.broadcast_to %2920, %2921 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2923 = torch.valsem.aten.copy %2922, %2917, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2924 = torch.aten.where.self %2918, %2923, %2909 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2925 = torch.aten.lt.Tensor %2924, %2913 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%2926 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2927 = torch.aten.to.dtype %2926, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2928 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2929 = torch.aten.broadcast_to %2927, %2928 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2930 = torch.valsem.aten.copy %2929, %2913, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2931 = torch.aten.where.self %2925, %2930, %2924 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2932 = torch.aten.round %2931 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2933 = torch.aten.sub.Tensor %2932, %2907, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2934 = torch.aten.mul.Tensor %2933, %2905 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2935 = torch.prim.ListConstruct %int640, %int40, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%2936 = torch.aten.broadcast_to %119, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%2937 = torch.aten.clone %2936, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%2938 = torch.prim.ListConstruct %int640, %int640, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%2939 = torch.aten.view %2937, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%2940 = torch.aten.mul.Tensor %120, %2939 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%2941 = torch.aten.convolution %2934, %2940, %121, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2942 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2943 = torch.aten.detach %2942 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2944 = torch.aten.view %2753, %2793 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%2945 = torch.aten.abs %2944 : !torch.vtensor<[2,20,16,32,32],f16> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%values_60, %indices_61 = torch.aten.max.dim %2945, %int2, %true : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,32,32],f16>, !torch.vtensor<[2,20,1,32,32],si64> loc(#loc1)
%2946 = torch.aten.view %values_60, %2796 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,32,32],f16> loc(#loc1)
%2947 = torch.aten.broadcast_to %2946, %2793 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%2948 = torch.aten.clone %2947, %int0 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
%2949 = torch.aten.view %2948, %2776 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2950 = torch.aten.sub.Tensor %2943, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2951 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2952 = torch.aten.pow.Tensor_Tensor %2951, %2950 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2953 = torch.aten.neg %2952 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2954 = torch.aten.neg %2953 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2955 = torch.aten.div.Tensor %2949, %2954 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2956 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2957 = torch.aten.detach %2956 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2958 = torch.aten.div.Tensor %2753, %2955 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2959 = torch.aten.add.Tensor %2958, %2957, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2960 = torch.aten.sub.Tensor %2943, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2961 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2962 = torch.aten.pow.Tensor_Tensor %2961, %2960 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2963 = torch.aten.neg %2962 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2964 = torch.aten.sub.Tensor %2943, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2965 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2966 = torch.aten.pow.Tensor_Tensor %2965, %2964 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%2967 = torch.aten.sub.Tensor %2966, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%2968 = torch.aten.gt.Tensor %2959, %2967 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
%2969 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2970 = torch.aten.to.dtype %2969, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2971 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2972 = torch.aten.broadcast_to %2970, %2971 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2973 = torch.valsem.aten.copy %2972, %2967, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2974 = torch.aten.where.self %2968, %2973, %2959 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2975 = torch.aten.lt.Tensor %2974, %2963 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
%2976 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2977 = torch.aten.to.dtype %2976, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%2978 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%2979 = torch.aten.broadcast_to %2977, %2978 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%2980 = torch.valsem.aten.copy %2979, %2963, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%2981 = torch.aten.where.self %2975, %2980, %2974 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2982 = torch.aten.round %2981 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2983 = torch.aten.sub.Tensor %2982, %2957, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2984 = torch.aten.mul.Tensor %2983, %2955 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
%2985 = torch.prim.ListConstruct %int640, %int20, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%2986 = torch.aten.broadcast_to %122, %2985 : !torch.vtensor<[640,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,20,16,1,1],f16> loc(#loc1)
%2987 = torch.aten.clone %2986, %int0 : !torch.vtensor<[640,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,20,16,1,1],f16> loc(#loc1)
%2988 = torch.prim.ListConstruct %int640, %int320, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%2989 = torch.aten.view %2987, %2988 : !torch.vtensor<[640,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,320,1,1],f16> loc(#loc1)
%2990 = torch.aten.mul.Tensor %123, %2989 : !torch.vtensor<[640,320,1,1],si8>, !torch.vtensor<[640,320,1,1],f16> -> !torch.vtensor<[640,320,1,1],f16> loc(#loc1)
%2991 = torch.aten.convolution %2984, %2990, %124, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2992 = torch.aten.add.Tensor %2991, %2941, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2993 = torch.aten.div.Tensor %2992, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2994 = torch.aten.clone %2993, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%2995 = torch.aten.view %2994, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
%2996 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%2997 = torch.aten.to.dtype %2996, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%2998 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%2999 = torch.aten.broadcast_to %2997, %2998 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3000 = torch.valsem.aten.copy %2999, %2995, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3001 = torch.aten.to.dtype %3000, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3002 = torch.aten.sum.dim_IntList %3001, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3003 = torch.aten.div.Scalar %3002, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3004 = torch.aten.sub.Tensor %3001, %3003, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3005 = torch.aten.mul.Tensor %3004, %3004 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3006 = torch.aten.sum.dim_IntList %3005, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3007 = torch.aten.div.Scalar %3006, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3008 = torch.aten.to.dtype %3007, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3009 = torch.aten.sum.dim_IntList %3000, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3010 = torch.aten.div.Scalar %3009, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3011 = torch.aten.add.Tensor %3008, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3012 = torch.aten.rsqrt %3011 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3013 = torch.aten.sub.Tensor %2995, %3010, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3014 = torch.aten.mul.Tensor %3013, %3012 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3015 = torch.aten.view %3014, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3016 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3017 = torch.aten.to.dtype %3016, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3018 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3019 = torch.aten.broadcast_to %3017, %3018 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3020 = torch.valsem.aten.copy %3019, %3015, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3021 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3022 = torch.aten.detach %3021 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3023 = torch.aten.view %3020, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3024 = torch.aten.abs %3023 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_62, %indices_63 = torch.aten.max.dim %3024, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%3025 = torch.aten.view %values_62, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%3026 = torch.aten.broadcast_to %3025, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3027 = torch.aten.clone %3026, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3028 = torch.aten.view %3027, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3029 = torch.aten.sub.Tensor %3022, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3030 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3031 = torch.aten.pow.Tensor_Tensor %3030, %3029 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3032 = torch.aten.neg %3031 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3033 = torch.aten.neg %3032 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3034 = torch.aten.div.Tensor %3028, %3033 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3035 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3036 = torch.aten.detach %3035 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3037 = torch.aten.div.Tensor %3020, %3034 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3038 = torch.aten.add.Tensor %3037, %3036, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3039 = torch.aten.sub.Tensor %3022, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3040 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3041 = torch.aten.pow.Tensor_Tensor %3040, %3039 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3042 = torch.aten.neg %3041 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3043 = torch.aten.sub.Tensor %3022, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3044 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3045 = torch.aten.pow.Tensor_Tensor %3044, %3043 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3046 = torch.aten.sub.Tensor %3045, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3047 = torch.aten.gt.Tensor %3038, %3046 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3048 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3049 = torch.aten.to.dtype %3048, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3050 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3051 = torch.aten.broadcast_to %3049, %3050 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3052 = torch.valsem.aten.copy %3051, %3046, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3053 = torch.aten.where.self %3047, %3052, %3038 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3054 = torch.aten.lt.Tensor %3053, %3042 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3055 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3056 = torch.aten.to.dtype %3055, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3057 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3058 = torch.aten.broadcast_to %3056, %3057 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3059 = torch.valsem.aten.copy %3058, %3042, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3060 = torch.aten.where.self %3054, %3059, %3053 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3061 = torch.aten.round %3060 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3062 = torch.aten.sub.Tensor %3061, %3036, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3063 = torch.aten.mul.Tensor %3062, %3034 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3064 = torch.prim.ListConstruct %int640, %int40, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3065 = torch.aten.broadcast_to %125, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%3066 = torch.aten.clone %3065, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%3067 = torch.prim.ListConstruct %int640, %int640, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3068 = torch.aten.view %3066, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%3069 = torch.aten.mul.Tensor %126, %3068 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%3070 = torch.aten.convolution %3063, %3069, %127, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3071 = torch.aten.permute %3070, %1196 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
%3072 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%3073 = torch.aten.view %3071, %3072 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3074 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%3075 = torch.aten.sum.dim_IntList %3073, %3074, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3076 = torch.aten.div.Scalar %3075, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3077 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3078 = torch.aten.broadcast_to %3076, %3077 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3079 = torch.aten.sub.Tensor %3073, %3078, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3080 = torch.aten.mul.Tensor %3079, %3079 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3081 = torch.aten.sum.dim_IntList %3080, %3074, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3082 = torch.aten.div.Scalar %3081, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3083 = torch.aten.add.Scalar %3082, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3084 = torch.aten.rsqrt %3083 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3085 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3086 = torch.aten.broadcast_to %3084, %3085 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3087 = torch.aten.mul.Tensor %3079, %3086 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3088 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3089 = torch.aten.detach %3088 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3090 = torch.prim.ListConstruct %int2, %int1024, %int40, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%3091 = torch.aten.view %3087, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3092 = torch.aten.abs %3091 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_64, %indices_65 = torch.aten.max.dim %3092, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3093 = torch.prim.ListConstruct %int2, %int1024, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%3094 = torch.aten.view %values_64, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3095 = torch.aten.broadcast_to %3094, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3096 = torch.aten.clone %3095, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3097 = torch.aten.view %3096, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3098 = torch.aten.sub.Tensor %3089, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3099 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3100 = torch.aten.pow.Tensor_Tensor %3099, %3098 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3101 = torch.aten.neg %3100 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3102 = torch.aten.neg %3101 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3103 = torch.aten.div.Tensor %3097, %3102 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3104 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3105 = torch.aten.detach %3104 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3106 = torch.aten.div.Tensor %3087, %3103 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3107 = torch.aten.add.Tensor %3106, %3105, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3108 = torch.aten.sub.Tensor %3089, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3109 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3110 = torch.aten.pow.Tensor_Tensor %3109, %3108 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3111 = torch.aten.neg %3110 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3112 = torch.aten.sub.Tensor %3089, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3113 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3114 = torch.aten.pow.Tensor_Tensor %3113, %3112 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3115 = torch.aten.sub.Tensor %3114, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3116 = torch.aten.gt.Tensor %3107, %3115 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3117 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3118 = torch.aten.to.dtype %3117, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3119 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3120 = torch.aten.broadcast_to %3118, %3119 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3121 = torch.valsem.aten.copy %3120, %3115, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3122 = torch.aten.where.self %3116, %3121, %3107 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3123 = torch.aten.lt.Tensor %3122, %3111 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3124 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3125 = torch.aten.to.dtype %3124, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3126 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3127 = torch.aten.broadcast_to %3125, %3126 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3128 = torch.valsem.aten.copy %3127, %3111, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3129 = torch.aten.where.self %3123, %3128, %3122 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3130 = torch.aten.round %3129 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3131 = torch.aten.sub.Tensor %3130, %3105, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3132 = torch.aten.mul.Tensor %3131, %3103 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3133 = torch.prim.ListConstruct %int640, %int40, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3134 = torch.aten.broadcast_to %128, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3135 = torch.aten.clone %3134, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3136 = torch.prim.ListConstruct %int640, %int640 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3137 = torch.aten.view %3135, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3138 = torch.aten.mul.Tensor %129, %3137 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3139 = torch.aten.transpose.int %3138, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3140 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%3141 = torch.aten.view %3132, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3142 = torch.aten.mm %3141, %3139 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3143 = torch.aten.mul.Scalar %130, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3144 = torch.aten.add.Tensor %3143, %3142, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3145 = torch.aten.view %3144, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3146 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3147 = torch.aten.detach %3146 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3148 = torch.aten.view %3087, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3149 = torch.aten.abs %3148 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_66, %indices_67 = torch.aten.max.dim %3149, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3150 = torch.aten.view %values_66, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3151 = torch.aten.broadcast_to %3150, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3152 = torch.aten.clone %3151, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3153 = torch.aten.view %3152, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3154 = torch.aten.sub.Tensor %3147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3155 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3156 = torch.aten.pow.Tensor_Tensor %3155, %3154 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3157 = torch.aten.neg %3156 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3158 = torch.aten.neg %3157 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3159 = torch.aten.div.Tensor %3153, %3158 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3160 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3161 = torch.aten.detach %3160 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3162 = torch.aten.div.Tensor %3087, %3159 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3163 = torch.aten.add.Tensor %3162, %3161, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3164 = torch.aten.sub.Tensor %3147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3165 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3166 = torch.aten.pow.Tensor_Tensor %3165, %3164 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3167 = torch.aten.neg %3166 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3168 = torch.aten.sub.Tensor %3147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3169 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3170 = torch.aten.pow.Tensor_Tensor %3169, %3168 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3171 = torch.aten.sub.Tensor %3170, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3172 = torch.aten.gt.Tensor %3163, %3171 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3173 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3174 = torch.aten.to.dtype %3173, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3175 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3176 = torch.aten.broadcast_to %3174, %3175 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3177 = torch.valsem.aten.copy %3176, %3171, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3178 = torch.aten.where.self %3172, %3177, %3163 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3179 = torch.aten.lt.Tensor %3178, %3167 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3180 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3181 = torch.aten.to.dtype %3180, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3182 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3183 = torch.aten.broadcast_to %3181, %3182 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3184 = torch.valsem.aten.copy %3183, %3167, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3185 = torch.aten.where.self %3179, %3184, %3178 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3186 = torch.aten.round %3185 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3187 = torch.aten.sub.Tensor %3186, %3161, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3188 = torch.aten.mul.Tensor %3187, %3159 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3189 = torch.aten.broadcast_to %131, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3190 = torch.aten.clone %3189, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3191 = torch.aten.view %3190, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3192 = torch.aten.mul.Tensor %132, %3191 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3193 = torch.aten.transpose.int %3192, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3194 = torch.aten.view %3188, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3195 = torch.aten.mm %3194, %3193 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3196 = torch.aten.mul.Scalar %133, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3197 = torch.aten.add.Tensor %3196, %3195, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3198 = torch.aten.view %3197, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3199 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3200 = torch.aten.detach %3199 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3201 = torch.aten.view %3087, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3202 = torch.aten.abs %3201 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_68, %indices_69 = torch.aten.max.dim %3202, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3203 = torch.aten.view %values_68, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3204 = torch.aten.broadcast_to %3203, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3205 = torch.aten.clone %3204, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3206 = torch.aten.view %3205, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3207 = torch.aten.sub.Tensor %3200, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3208 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3209 = torch.aten.pow.Tensor_Tensor %3208, %3207 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3210 = torch.aten.neg %3209 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3211 = torch.aten.neg %3210 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3212 = torch.aten.div.Tensor %3206, %3211 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3213 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3214 = torch.aten.detach %3213 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3215 = torch.aten.div.Tensor %3087, %3212 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3216 = torch.aten.add.Tensor %3215, %3214, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3217 = torch.aten.sub.Tensor %3200, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3218 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3219 = torch.aten.pow.Tensor_Tensor %3218, %3217 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3220 = torch.aten.neg %3219 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3221 = torch.aten.sub.Tensor %3200, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3222 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3223 = torch.aten.pow.Tensor_Tensor %3222, %3221 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3224 = torch.aten.sub.Tensor %3223, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3225 = torch.aten.gt.Tensor %3216, %3224 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3226 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3227 = torch.aten.to.dtype %3226, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3228 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3229 = torch.aten.broadcast_to %3227, %3228 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3230 = torch.valsem.aten.copy %3229, %3224, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3231 = torch.aten.where.self %3225, %3230, %3216 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3232 = torch.aten.lt.Tensor %3231, %3220 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3233 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3234 = torch.aten.to.dtype %3233, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3235 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3236 = torch.aten.broadcast_to %3234, %3235 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3237 = torch.valsem.aten.copy %3236, %3220, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3238 = torch.aten.where.self %3232, %3237, %3231 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3239 = torch.aten.round %3238 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3240 = torch.aten.sub.Tensor %3239, %3214, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3241 = torch.aten.mul.Tensor %3240, %3212 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3242 = torch.aten.broadcast_to %134, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3243 = torch.aten.clone %3242, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3244 = torch.aten.view %3243, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3245 = torch.aten.mul.Tensor %135, %3244 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3246 = torch.aten.transpose.int %3245, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3247 = torch.aten.view %3241, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3248 = torch.aten.mm %3247, %3246 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3249 = torch.aten.mul.Scalar %136, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3250 = torch.aten.add.Tensor %3249, %3248, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3251 = torch.aten.view %3250, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3252 = torch.prim.ListConstruct %int2, %int1024, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%3253 = torch.aten.view %3145, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3254 = torch.aten.permute %3253, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3255 = torch.aten.clone %3254, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3256 = torch.prim.ListConstruct %int16, %int1024, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3257 = torch.aten.view %3255, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3258 = torch.aten.view %3198, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3259 = torch.aten.permute %3258, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3260 = torch.aten.clone %3259, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3261 = torch.aten.view %3260, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3262 = torch.aten.view %3251, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3263 = torch.aten.permute %3262, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3264 = torch.aten.clone %3263, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3265 = torch.aten.view %3264, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3266 = torch.aten.transpose.int %3261, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
%3267 = torch.aten.broadcast_to %3257, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3268 = torch.aten.view %3267, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3269 = torch.prim.ListConstruct %int16, %int80, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3270 = torch.aten.broadcast_to %3266, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
%3271 = torch.aten.view %3270, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
%3272 = torch.aten.bmm %3268, %3271 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3273 = torch.prim.ListConstruct %int16, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3274 = torch.aten.view %3272, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3275 = torch.aten.mul.Tensor %3274, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%values_70, %indices_71 = torch.aten.max.dim %3275, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
%3276 = torch.aten.sub.Tensor %3275, %values_70, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3277 = torch.aten.exp %3276 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3278 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%3279 = torch.aten.sum.dim_IntList %3277, %3278, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
%3280 = torch.aten.div.Tensor %3277, %3279 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3281 = torch.aten.broadcast_to %3280, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3282 = torch.aten.view %3281, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%3283 = torch.aten.broadcast_to %3265, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3284 = torch.aten.view %3283, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3285 = torch.aten.bmm %3282, %3284 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3286 = torch.aten.view %3285, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3287 = torch.prim.ListConstruct %int2, %int8, %int1024, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%3288 = torch.aten.view %3286, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3289 = torch.aten.permute %3288, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3290 = torch.aten.clone %3289, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3291 = torch.aten.view %3290, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3292 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3293 = torch.aten.detach %3292 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3294 = torch.aten.view %3291, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3295 = torch.aten.abs %3294 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_72, %indices_73 = torch.aten.max.dim %3295, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3296 = torch.aten.view %values_72, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3297 = torch.aten.broadcast_to %3296, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3298 = torch.aten.clone %3297, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3299 = torch.aten.view %3298, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3300 = torch.aten.sub.Tensor %3293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3301 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3302 = torch.aten.pow.Tensor_Tensor %3301, %3300 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3303 = torch.aten.neg %3302 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3304 = torch.aten.neg %3303 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3305 = torch.aten.div.Tensor %3299, %3304 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3306 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3307 = torch.aten.detach %3306 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3308 = torch.aten.div.Tensor %3291, %3305 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3309 = torch.aten.add.Tensor %3308, %3307, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3310 = torch.aten.sub.Tensor %3293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3311 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3312 = torch.aten.pow.Tensor_Tensor %3311, %3310 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3313 = torch.aten.neg %3312 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3314 = torch.aten.sub.Tensor %3293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3315 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3316 = torch.aten.pow.Tensor_Tensor %3315, %3314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3317 = torch.aten.sub.Tensor %3316, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3318 = torch.aten.gt.Tensor %3309, %3317 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3319 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3320 = torch.aten.to.dtype %3319, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3321 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3322 = torch.aten.broadcast_to %3320, %3321 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3323 = torch.valsem.aten.copy %3322, %3317, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3324 = torch.aten.where.self %3318, %3323, %3309 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3325 = torch.aten.lt.Tensor %3324, %3313 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3327 = torch.aten.to.dtype %3326, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3328 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3329 = torch.aten.broadcast_to %3327, %3328 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3330 = torch.valsem.aten.copy %3329, %3313, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3331 = torch.aten.where.self %3325, %3330, %3324 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3332 = torch.aten.round %3331 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3333 = torch.aten.sub.Tensor %3332, %3307, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3334 = torch.aten.mul.Tensor %3333, %3305 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3335 = torch.aten.broadcast_to %137, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3336 = torch.aten.clone %3335, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3337 = torch.aten.view %3336, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3338 = torch.aten.mul.Tensor %138, %3337 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3339 = torch.aten.transpose.int %3338, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3340 = torch.aten.view %3334, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3341 = torch.aten.mm %3340, %3339 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3342 = torch.aten.mul.Scalar %139, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3343 = torch.aten.add.Tensor %3342, %3341, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3344 = torch.aten.view %3343, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3345 = torch.aten.add.Tensor %3344, %3073, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3346 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%3347 = torch.aten.sum.dim_IntList %3345, %3346, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3348 = torch.aten.div.Scalar %3347, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3349 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3350 = torch.aten.broadcast_to %3348, %3349 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3351 = torch.aten.sub.Tensor %3345, %3350, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3352 = torch.aten.mul.Tensor %3351, %3351 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3353 = torch.aten.sum.dim_IntList %3352, %3346, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3354 = torch.aten.div.Scalar %3353, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3355 = torch.aten.add.Scalar %3354, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3356 = torch.aten.rsqrt %3355 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3357 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3358 = torch.aten.broadcast_to %3356, %3357 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3359 = torch.aten.mul.Tensor %3351, %3358 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3360 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3361 = torch.aten.detach %3360 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3362 = torch.aten.view %3359, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3363 = torch.aten.abs %3362 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_74, %indices_75 = torch.aten.max.dim %3363, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3364 = torch.aten.view %values_74, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3365 = torch.aten.broadcast_to %3364, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3366 = torch.aten.clone %3365, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3367 = torch.aten.view %3366, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3368 = torch.aten.sub.Tensor %3361, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3369 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3370 = torch.aten.pow.Tensor_Tensor %3369, %3368 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3371 = torch.aten.neg %3370 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3372 = torch.aten.neg %3371 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3373 = torch.aten.div.Tensor %3367, %3372 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3374 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3375 = torch.aten.detach %3374 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3376 = torch.aten.div.Tensor %3359, %3373 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3377 = torch.aten.add.Tensor %3376, %3375, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3378 = torch.aten.sub.Tensor %3361, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3379 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3380 = torch.aten.pow.Tensor_Tensor %3379, %3378 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3381 = torch.aten.neg %3380 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3382 = torch.aten.sub.Tensor %3361, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3383 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3384 = torch.aten.pow.Tensor_Tensor %3383, %3382 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3385 = torch.aten.sub.Tensor %3384, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3386 = torch.aten.gt.Tensor %3377, %3385 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3387 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3388 = torch.aten.to.dtype %3387, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3389 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3390 = torch.aten.broadcast_to %3388, %3389 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3391 = torch.valsem.aten.copy %3390, %3385, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3392 = torch.aten.where.self %3386, %3391, %3377 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3393 = torch.aten.lt.Tensor %3392, %3381 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3394 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3395 = torch.aten.to.dtype %3394, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3396 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3397 = torch.aten.broadcast_to %3395, %3396 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3398 = torch.valsem.aten.copy %3397, %3381, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3399 = torch.aten.where.self %3393, %3398, %3392 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3400 = torch.aten.round %3399 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3401 = torch.aten.sub.Tensor %3400, %3375, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3402 = torch.aten.mul.Tensor %3401, %3373 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3403 = torch.aten.broadcast_to %140, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3404 = torch.aten.clone %3403, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3405 = torch.aten.view %3404, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3406 = torch.aten.mul.Tensor %141, %3405 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3407 = torch.aten.transpose.int %3406, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3408 = torch.aten.view %3402, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3409 = torch.aten.mm %3408, %3407 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3410 = torch.aten.mul.Scalar %142, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3411 = torch.aten.add.Tensor %3410, %3409, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3412 = torch.aten.view %3411, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3413 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3414 = torch.aten.detach %3413 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3415 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%3416 = torch.aten.abs %3415 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_76, %indices_77 = torch.aten.max.dim %3416, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%3417 = torch.aten.view %values_76, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%3418 = torch.aten.broadcast_to %3417, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%3419 = torch.aten.clone %3418, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%3420 = torch.aten.view %3419, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3421 = torch.aten.sub.Tensor %3414, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3422 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3423 = torch.aten.pow.Tensor_Tensor %3422, %3421 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3424 = torch.aten.neg %3423 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3425 = torch.aten.neg %3424 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3426 = torch.aten.div.Tensor %3420, %3425 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3427 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3428 = torch.aten.detach %3427 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3429 = torch.aten.div.Tensor %arg2, %3426 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3430 = torch.aten.add.Tensor %3429, %3428, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3431 = torch.aten.sub.Tensor %3414, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3432 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3433 = torch.aten.pow.Tensor_Tensor %3432, %3431 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3434 = torch.aten.neg %3433 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3435 = torch.aten.sub.Tensor %3414, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3436 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3437 = torch.aten.pow.Tensor_Tensor %3436, %3435 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3438 = torch.aten.sub.Tensor %3437, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3439 = torch.aten.gt.Tensor %3430, %3438 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%3440 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3441 = torch.aten.to.dtype %3440, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3442 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3443 = torch.aten.broadcast_to %3441, %3442 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3444 = torch.valsem.aten.copy %3443, %3438, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3445 = torch.aten.where.self %3439, %3444, %3430 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3446 = torch.aten.lt.Tensor %3445, %3434 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%3447 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3448 = torch.aten.to.dtype %3447, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3449 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3450 = torch.aten.broadcast_to %3448, %3449 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3451 = torch.valsem.aten.copy %3450, %3434, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3452 = torch.aten.where.self %3446, %3451, %3445 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3453 = torch.aten.round %3452 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3454 = torch.aten.sub.Tensor %3453, %3428, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3455 = torch.aten.mul.Tensor %3454, %3426 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3456 = torch.prim.ListConstruct %int640, %int48, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3457 = torch.aten.broadcast_to %143, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%3458 = torch.aten.clone %3457, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%3459 = torch.prim.ListConstruct %int640, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3460 = torch.aten.view %3458, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%3461 = torch.aten.mul.Tensor %144, %3460 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%3462 = torch.aten.transpose.int %3461, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
%3463 = torch.aten.view %3455, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%3464 = torch.aten.mm %3463, %3462 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
%3465 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%3466 = torch.aten.view %3464, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
%3467 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3468 = torch.aten.detach %3467 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3469 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%3470 = torch.aten.abs %3469 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_78, %indices_79 = torch.aten.max.dim %3470, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%3471 = torch.aten.view %values_78, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%3472 = torch.aten.broadcast_to %3471, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%3473 = torch.aten.clone %3472, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%3474 = torch.aten.view %3473, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3475 = torch.aten.sub.Tensor %3468, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3476 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3477 = torch.aten.pow.Tensor_Tensor %3476, %3475 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3478 = torch.aten.neg %3477 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3479 = torch.aten.neg %3478 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3480 = torch.aten.div.Tensor %3474, %3479 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3481 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3482 = torch.aten.detach %3481 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3483 = torch.aten.div.Tensor %arg2, %3480 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3484 = torch.aten.add.Tensor %3483, %3482, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3485 = torch.aten.sub.Tensor %3468, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3486 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3487 = torch.aten.pow.Tensor_Tensor %3486, %3485 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3488 = torch.aten.neg %3487 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3489 = torch.aten.sub.Tensor %3468, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3490 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3491 = torch.aten.pow.Tensor_Tensor %3490, %3489 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3492 = torch.aten.sub.Tensor %3491, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3493 = torch.aten.gt.Tensor %3484, %3492 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%3494 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3495 = torch.aten.to.dtype %3494, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3496 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3497 = torch.aten.broadcast_to %3495, %3496 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3498 = torch.valsem.aten.copy %3497, %3492, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3499 = torch.aten.where.self %3493, %3498, %3484 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3500 = torch.aten.lt.Tensor %3499, %3488 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%3501 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3502 = torch.aten.to.dtype %3501, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3503 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3504 = torch.aten.broadcast_to %3502, %3503 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3505 = torch.valsem.aten.copy %3504, %3488, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3506 = torch.aten.where.self %3500, %3505, %3499 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3507 = torch.aten.round %3506 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3508 = torch.aten.sub.Tensor %3507, %3482, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3509 = torch.aten.mul.Tensor %3508, %3480 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%3510 = torch.aten.broadcast_to %145, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%3511 = torch.aten.clone %3510, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%3512 = torch.aten.view %3511, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%3513 = torch.aten.mul.Tensor %146, %3512 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%3514 = torch.aten.transpose.int %3513, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
%3515 = torch.aten.view %3509, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%3516 = torch.aten.mm %3515, %3514 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
%3517 = torch.aten.view %3516, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
%3518 = torch.aten.view %3412, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3519 = torch.aten.permute %3518, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3520 = torch.aten.clone %3519, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3521 = torch.aten.view %3520, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3522 = torch.prim.ListConstruct %int2, %int77, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%3523 = torch.aten.view %3466, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
%3524 = torch.aten.permute %3523, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%3525 = torch.aten.clone %3524, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%3526 = torch.prim.ListConstruct %int16, %int77, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3527 = torch.aten.view %3525, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%3528 = torch.aten.view %3517, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
%3529 = torch.aten.permute %3528, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%3530 = torch.aten.clone %3529, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%3531 = torch.aten.view %3530, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%3532 = torch.aten.transpose.int %3527, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
%3533 = torch.aten.broadcast_to %3521, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3534 = torch.aten.view %3533, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3535 = torch.prim.ListConstruct %int16, %int80, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3536 = torch.aten.broadcast_to %3532, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
%3537 = torch.aten.view %3536, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
%3538 = torch.aten.bmm %3534, %3537 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3539 = torch.prim.ListConstruct %int16, %int1024, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3540 = torch.aten.view %3538, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3541 = torch.aten.mul.Tensor %3540, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%values_80, %indices_81 = torch.aten.max.dim %3541, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
%3542 = torch.aten.sub.Tensor %3541, %values_80, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3543 = torch.aten.exp %3542 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3544 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%3545 = torch.aten.sum.dim_IntList %3543, %3544, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
%3546 = torch.aten.div.Tensor %3543, %3545 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3547 = torch.aten.broadcast_to %3546, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3548 = torch.aten.view %3547, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%3549 = torch.aten.broadcast_to %3531, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%3550 = torch.aten.view %3549, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%3551 = torch.aten.bmm %3548, %3550 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3552 = torch.aten.view %3551, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%3553 = torch.aten.view %3552, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%3554 = torch.aten.permute %3553, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3555 = torch.aten.clone %3554, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%3556 = torch.aten.view %3555, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3557 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3558 = torch.aten.detach %3557 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3559 = torch.aten.view %3556, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3560 = torch.aten.abs %3559 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_82, %indices_83 = torch.aten.max.dim %3560, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3561 = torch.aten.view %values_82, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3562 = torch.aten.broadcast_to %3561, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3563 = torch.aten.clone %3562, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3564 = torch.aten.view %3563, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3565 = torch.aten.sub.Tensor %3558, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3566 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3567 = torch.aten.pow.Tensor_Tensor %3566, %3565 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3568 = torch.aten.neg %3567 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3569 = torch.aten.neg %3568 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3570 = torch.aten.div.Tensor %3564, %3569 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3571 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3572 = torch.aten.detach %3571 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3573 = torch.aten.div.Tensor %3556, %3570 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3574 = torch.aten.add.Tensor %3573, %3572, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3575 = torch.aten.sub.Tensor %3558, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3576 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3577 = torch.aten.pow.Tensor_Tensor %3576, %3575 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3578 = torch.aten.neg %3577 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3579 = torch.aten.sub.Tensor %3558, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3580 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3581 = torch.aten.pow.Tensor_Tensor %3580, %3579 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3582 = torch.aten.sub.Tensor %3581, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3583 = torch.aten.gt.Tensor %3574, %3582 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3584 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3585 = torch.aten.to.dtype %3584, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3586 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3587 = torch.aten.broadcast_to %3585, %3586 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3588 = torch.valsem.aten.copy %3587, %3582, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3589 = torch.aten.where.self %3583, %3588, %3574 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3590 = torch.aten.lt.Tensor %3589, %3578 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%3591 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3592 = torch.aten.to.dtype %3591, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3593 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3594 = torch.aten.broadcast_to %3592, %3593 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3595 = torch.valsem.aten.copy %3594, %3578, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3596 = torch.aten.where.self %3590, %3595, %3589 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3597 = torch.aten.round %3596 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3598 = torch.aten.sub.Tensor %3597, %3572, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3599 = torch.aten.mul.Tensor %3598, %3570 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3600 = torch.aten.broadcast_to %147, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3601 = torch.aten.clone %3600, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%3602 = torch.aten.view %3601, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3603 = torch.aten.mul.Tensor %148, %3602 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3604 = torch.aten.transpose.int %3603, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%3605 = torch.aten.view %3599, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3606 = torch.aten.mm %3605, %3604 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3607 = torch.aten.mul.Scalar %149, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3608 = torch.aten.add.Tensor %3607, %3606, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3609 = torch.aten.view %3608, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3610 = torch.aten.add.Tensor %3609, %3345, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3611 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%3612 = torch.aten.sum.dim_IntList %3610, %3611, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3613 = torch.aten.div.Scalar %3612, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3614 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3615 = torch.aten.broadcast_to %3613, %3614 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3616 = torch.aten.sub.Tensor %3610, %3615, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3617 = torch.aten.mul.Tensor %3616, %3616 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3618 = torch.aten.sum.dim_IntList %3617, %3611, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3619 = torch.aten.div.Scalar %3618, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3620 = torch.aten.add.Scalar %3619, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3621 = torch.aten.rsqrt %3620 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3622 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3623 = torch.aten.broadcast_to %3621, %3622 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3624 = torch.aten.mul.Tensor %3616, %3623 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3625 = torch.prim.ListConstruct %int5120, %int40, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3626 = torch.aten.broadcast_to %150, %3625 : !torch.vtensor<[5120,40,1],f16>, !torch.list<int> -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
%3627 = torch.aten.clone %3626, %int0 : !torch.vtensor<[5120,40,16],f16>, !torch.int -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
%3628 = torch.prim.ListConstruct %int5120, %int640 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3629 = torch.aten.view %3627, %3628 : !torch.vtensor<[5120,40,16],f16>, !torch.list<int> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
%3630 = torch.aten.mul.Tensor %151, %3629 : !torch.vtensor<[5120,640],si8>, !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
%3631 = torch.aten.transpose.int %3630, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16> loc(#loc1)
%3632 = torch.aten.view %3624, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3633 = torch.aten.mm %3632, %3631 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
%3634 = torch.aten.mul.Scalar %152, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16> loc(#loc1)
%3635 = torch.aten.add.Tensor %3634, %3633, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
%3636 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%3637 = torch.aten.view %3635, %3636 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16> loc(#loc1)
%3638 = torch.aten.slice.Tensor %3637, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%3639 = torch.aten.slice.Tensor %3637, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%3640 = torch.aten.gelu %3639, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%3641 = torch.aten.mul.Tensor %3638, %3640 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%3642 = torch.prim.ListConstruct %int640, %int160, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%3643 = torch.aten.broadcast_to %153, %3642 : !torch.vtensor<[640,160,1],f16>, !torch.list<int> -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
%3644 = torch.aten.clone %3643, %int0 : !torch.vtensor<[640,160,16],f16>, !torch.int -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
%3645 = torch.prim.ListConstruct %int640, %int2560 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%3646 = torch.aten.view %3644, %3645 : !torch.vtensor<[640,160,16],f16>, !torch.list<int> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
%3647 = torch.aten.mul.Tensor %154, %3646 : !torch.vtensor<[640,2560],si8>, !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
%3648 = torch.aten.transpose.int %3647, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16> loc(#loc1)
%3649 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%3650 = torch.aten.view %3641, %3649 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16> loc(#loc1)
%3651 = torch.aten.mm %3650, %3648 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3652 = torch.aten.mul.Scalar %155, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3653 = torch.aten.add.Tensor %3652, %3651, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%3654 = torch.aten.view %3653, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3655 = torch.aten.add.Tensor %3654, %3610, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3656 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%3657 = torch.aten.view %3655, %3656 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
%3658 = torch.aten.permute %3657, %1789 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3659 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3660 = torch.aten.detach %3659 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3661 = torch.aten.view %3658, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3662 = torch.aten.abs %3661 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_84, %indices_85 = torch.aten.max.dim %3662, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%3663 = torch.aten.view %values_84, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%3664 = torch.aten.broadcast_to %3663, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3665 = torch.aten.clone %3664, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3666 = torch.aten.view %3665, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3667 = torch.aten.sub.Tensor %3660, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3668 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3669 = torch.aten.pow.Tensor_Tensor %3668, %3667 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3670 = torch.aten.neg %3669 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3671 = torch.aten.neg %3670 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3672 = torch.aten.div.Tensor %3666, %3671 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3673 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3674 = torch.aten.detach %3673 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3675 = torch.aten.div.Tensor %3658, %3672 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3676 = torch.aten.add.Tensor %3675, %3674, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3677 = torch.aten.sub.Tensor %3660, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3678 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3679 = torch.aten.pow.Tensor_Tensor %3678, %3677 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3680 = torch.aten.neg %3679 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3681 = torch.aten.sub.Tensor %3660, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3682 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3683 = torch.aten.pow.Tensor_Tensor %3682, %3681 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3684 = torch.aten.sub.Tensor %3683, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3685 = torch.aten.gt.Tensor %3676, %3684 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3686 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3687 = torch.aten.to.dtype %3686, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3688 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3689 = torch.aten.broadcast_to %3687, %3688 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3690 = torch.valsem.aten.copy %3689, %3684, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3691 = torch.aten.where.self %3685, %3690, %3676 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3692 = torch.aten.lt.Tensor %3691, %3680 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3693 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3694 = torch.aten.to.dtype %3693, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3695 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3696 = torch.aten.broadcast_to %3694, %3695 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3697 = torch.valsem.aten.copy %3696, %3680, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3698 = torch.aten.where.self %3692, %3697, %3691 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3699 = torch.aten.round %3698 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3700 = torch.aten.sub.Tensor %3699, %3674, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3701 = torch.aten.mul.Tensor %3700, %3672 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3702 = torch.aten.broadcast_to %156, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%3703 = torch.aten.clone %3702, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%3704 = torch.aten.view %3703, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%3705 = torch.aten.mul.Tensor %157, %3704 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%3706 = torch.aten.convolution %3701, %3705, %158, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3707 = torch.aten.add.Tensor %3706, %2993, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3708 = torch.aten.clone %3707, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3709 = torch.aten.view %3708, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
%3710 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3711 = torch.aten.to.dtype %3710, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3712 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3713 = torch.aten.broadcast_to %3711, %3712 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3714 = torch.valsem.aten.copy %3713, %3709, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3715 = torch.aten.to.dtype %3714, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3716 = torch.aten.sum.dim_IntList %3715, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3717 = torch.aten.div.Scalar %3716, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3718 = torch.aten.sub.Tensor %3715, %3717, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3719 = torch.aten.mul.Tensor %3718, %3718 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3720 = torch.aten.sum.dim_IntList %3719, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3721 = torch.aten.div.Scalar %3720, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3722 = torch.aten.to.dtype %3721, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3723 = torch.aten.sum.dim_IntList %3714, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3724 = torch.aten.div.Scalar %3723, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3725 = torch.aten.add.Tensor %3722, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3726 = torch.aten.rsqrt %3725 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3727 = torch.aten.sub.Tensor %3709, %3724, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3728 = torch.aten.mul.Tensor %3727, %3726 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3729 = torch.aten.view %3728, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3730 = torch.aten.unsqueeze %159, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%3731 = torch.aten.unsqueeze %3730, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%3732 = torch.aten.mul.Tensor %3729, %3731 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3733 = torch.aten.unsqueeze %160, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%3734 = torch.aten.unsqueeze %3733, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%3735 = torch.aten.add.Tensor %3732, %3734, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3736 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3737 = torch.aten.to.dtype %3736, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3738 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3739 = torch.aten.broadcast_to %3737, %3738 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3740 = torch.valsem.aten.copy %3739, %3735, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3741 = torch.aten.sigmoid %3740 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3742 = torch.aten.mul.Tensor %3741, %3740 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3743 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3744 = torch.aten.detach %3743 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3745 = torch.aten.view %3742, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3746 = torch.aten.abs %3745 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_86, %indices_87 = torch.aten.max.dim %3746, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%3747 = torch.aten.view %values_86, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%3748 = torch.aten.broadcast_to %3747, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3749 = torch.aten.clone %3748, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3750 = torch.aten.view %3749, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3751 = torch.aten.sub.Tensor %3744, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3752 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3753 = torch.aten.pow.Tensor_Tensor %3752, %3751 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3754 = torch.aten.neg %3753 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3755 = torch.aten.neg %3754 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3756 = torch.aten.div.Tensor %3750, %3755 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3757 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3758 = torch.aten.detach %3757 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3759 = torch.aten.div.Tensor %3742, %3756 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3760 = torch.aten.add.Tensor %3759, %3758, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3761 = torch.aten.sub.Tensor %3744, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3762 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3763 = torch.aten.pow.Tensor_Tensor %3762, %3761 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3764 = torch.aten.neg %3763 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3765 = torch.aten.sub.Tensor %3744, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3766 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3767 = torch.aten.pow.Tensor_Tensor %3766, %3765 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3768 = torch.aten.sub.Tensor %3767, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3769 = torch.aten.gt.Tensor %3760, %3768 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3770 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3771 = torch.aten.to.dtype %3770, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3772 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3773 = torch.aten.broadcast_to %3771, %3772 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3774 = torch.valsem.aten.copy %3773, %3768, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3775 = torch.aten.where.self %3769, %3774, %3760 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3776 = torch.aten.lt.Tensor %3775, %3764 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3777 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3778 = torch.aten.to.dtype %3777, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3779 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3780 = torch.aten.broadcast_to %3778, %3779 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3781 = torch.valsem.aten.copy %3780, %3764, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3782 = torch.aten.where.self %3776, %3781, %3775 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3783 = torch.aten.round %3782 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3784 = torch.aten.sub.Tensor %3783, %3758, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3785 = torch.aten.mul.Tensor %3784, %3756 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3786 = torch.aten.broadcast_to %161, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%3787 = torch.aten.clone %3786, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%3788 = torch.aten.view %3787, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%3789 = torch.aten.mul.Tensor %162, %3788 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%3790 = torch.aten.convolution %3785, %3789, %163, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3791 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%3792 = torch.aten.mul.Tensor %3791, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%3793 = torch.aten.transpose.int %164, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16> loc(#loc1)
%3794 = torch.aten.mm %3792, %3793 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16> loc(#loc1)
%3795 = torch.aten.mul.Scalar %165, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%3796 = torch.aten.add.Tensor %3795, %3794, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
%3797 = torch.aten.slice.Tensor %3796, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
%3798 = torch.aten.slice.Tensor %3797, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
%3799 = torch.aten.unsqueeze %3798, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16> loc(#loc1)
%3800 = torch.aten.unsqueeze %3799, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16> loc(#loc1)
%3801 = torch.aten.add.Tensor %3790, %3800, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3802 = torch.aten.view %3801, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
%3803 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3804 = torch.aten.to.dtype %3803, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3805 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3806 = torch.aten.broadcast_to %3804, %3805 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3807 = torch.valsem.aten.copy %3806, %3802, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3808 = torch.aten.to.dtype %3807, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3809 = torch.aten.sum.dim_IntList %3808, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3810 = torch.aten.div.Scalar %3809, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3811 = torch.aten.sub.Tensor %3808, %3810, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3812 = torch.aten.mul.Tensor %3811, %3811 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3813 = torch.aten.sum.dim_IntList %3812, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3814 = torch.aten.div.Scalar %3813, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3815 = torch.aten.to.dtype %3814, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3816 = torch.aten.sum.dim_IntList %3807, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3817 = torch.aten.div.Scalar %3816, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3818 = torch.aten.add.Tensor %3815, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3819 = torch.aten.rsqrt %3818 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3820 = torch.aten.sub.Tensor %3802, %3817, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3821 = torch.aten.mul.Tensor %3820, %3819 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3822 = torch.aten.view %3821, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3823 = torch.aten.unsqueeze %166, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%3824 = torch.aten.unsqueeze %3823, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%3825 = torch.aten.mul.Tensor %3822, %3824 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3826 = torch.aten.unsqueeze %167, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%3827 = torch.aten.unsqueeze %3826, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%3828 = torch.aten.add.Tensor %3825, %3827, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3829 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3830 = torch.aten.to.dtype %3829, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3831 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3832 = torch.aten.broadcast_to %3830, %3831 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3833 = torch.valsem.aten.copy %3832, %3828, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3834 = torch.aten.sigmoid %3833 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3835 = torch.aten.mul.Tensor %3834, %3833 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3836 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3837 = torch.aten.detach %3836 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3838 = torch.aten.view %3835, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3839 = torch.aten.abs %3838 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_88, %indices_89 = torch.aten.max.dim %3839, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%3840 = torch.aten.view %values_88, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%3841 = torch.aten.broadcast_to %3840, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3842 = torch.aten.clone %3841, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3843 = torch.aten.view %3842, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3844 = torch.aten.sub.Tensor %3837, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3845 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3846 = torch.aten.pow.Tensor_Tensor %3845, %3844 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3847 = torch.aten.neg %3846 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3848 = torch.aten.neg %3847 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3849 = torch.aten.div.Tensor %3843, %3848 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3850 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3851 = torch.aten.detach %3850 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3852 = torch.aten.div.Tensor %3835, %3849 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3853 = torch.aten.add.Tensor %3852, %3851, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3854 = torch.aten.sub.Tensor %3837, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3855 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3856 = torch.aten.pow.Tensor_Tensor %3855, %3854 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3857 = torch.aten.neg %3856 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3858 = torch.aten.sub.Tensor %3837, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3859 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3860 = torch.aten.pow.Tensor_Tensor %3859, %3858 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3861 = torch.aten.sub.Tensor %3860, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3862 = torch.aten.gt.Tensor %3853, %3861 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3863 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3864 = torch.aten.to.dtype %3863, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3865 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3866 = torch.aten.broadcast_to %3864, %3865 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3867 = torch.valsem.aten.copy %3866, %3861, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3868 = torch.aten.where.self %3862, %3867, %3853 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3869 = torch.aten.lt.Tensor %3868, %3857 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3870 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3871 = torch.aten.to.dtype %3870, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3872 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3873 = torch.aten.broadcast_to %3871, %3872 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3874 = torch.valsem.aten.copy %3873, %3857, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3875 = torch.aten.where.self %3869, %3874, %3868 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3876 = torch.aten.round %3875 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3877 = torch.aten.sub.Tensor %3876, %3851, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3878 = torch.aten.mul.Tensor %3877, %3849 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3879 = torch.aten.broadcast_to %168, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%3880 = torch.aten.clone %3879, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%3881 = torch.aten.view %3880, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%3882 = torch.aten.mul.Tensor %169, %3881 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%3883 = torch.aten.convolution %3878, %3882, %170, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3884 = torch.aten.add.Tensor %3707, %3883, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3885 = torch.aten.div.Tensor %3884, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3886 = torch.aten.clone %3885, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3887 = torch.aten.view %3886, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
%3888 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3889 = torch.aten.to.dtype %3888, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3890 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3891 = torch.aten.broadcast_to %3889, %3890 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3892 = torch.valsem.aten.copy %3891, %3887, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3893 = torch.aten.to.dtype %3892, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3894 = torch.aten.sum.dim_IntList %3893, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3895 = torch.aten.div.Scalar %3894, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3896 = torch.aten.sub.Tensor %3893, %3895, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3897 = torch.aten.mul.Tensor %3896, %3896 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
%3898 = torch.aten.sum.dim_IntList %3897, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3899 = torch.aten.div.Scalar %3898, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%3900 = torch.aten.to.dtype %3899, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3901 = torch.aten.sum.dim_IntList %3892, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3902 = torch.aten.div.Scalar %3901, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3903 = torch.aten.add.Tensor %3900, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3904 = torch.aten.rsqrt %3903 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%3905 = torch.aten.sub.Tensor %3887, %3902, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3906 = torch.aten.mul.Tensor %3905, %3904 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
%3907 = torch.aten.view %3906, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
%3908 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3909 = torch.aten.to.dtype %3908, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3910 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3911 = torch.aten.broadcast_to %3909, %3910 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3912 = torch.valsem.aten.copy %3911, %3907, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3913 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3914 = torch.aten.detach %3913 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3915 = torch.aten.view %3912, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3916 = torch.aten.abs %3915 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_90, %indices_91 = torch.aten.max.dim %3916, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%3917 = torch.aten.view %values_90, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%3918 = torch.aten.broadcast_to %3917, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3919 = torch.aten.clone %3918, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%3920 = torch.aten.view %3919, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3921 = torch.aten.sub.Tensor %3914, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3922 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3923 = torch.aten.pow.Tensor_Tensor %3922, %3921 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3924 = torch.aten.neg %3923 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3925 = torch.aten.neg %3924 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3926 = torch.aten.div.Tensor %3920, %3925 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3927 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3928 = torch.aten.detach %3927 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3929 = torch.aten.div.Tensor %3912, %3926 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3930 = torch.aten.add.Tensor %3929, %3928, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3931 = torch.aten.sub.Tensor %3914, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3932 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3933 = torch.aten.pow.Tensor_Tensor %3932, %3931 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3934 = torch.aten.neg %3933 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3935 = torch.aten.sub.Tensor %3914, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3936 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3937 = torch.aten.pow.Tensor_Tensor %3936, %3935 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3938 = torch.aten.sub.Tensor %3937, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3939 = torch.aten.gt.Tensor %3930, %3938 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3940 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3941 = torch.aten.to.dtype %3940, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3942 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3943 = torch.aten.broadcast_to %3941, %3942 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3944 = torch.valsem.aten.copy %3943, %3938, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3945 = torch.aten.where.self %3939, %3944, %3930 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3946 = torch.aten.lt.Tensor %3945, %3934 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%3947 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%3948 = torch.aten.to.dtype %3947, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%3949 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%3950 = torch.aten.broadcast_to %3948, %3949 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%3951 = torch.valsem.aten.copy %3950, %3934, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%3952 = torch.aten.where.self %3946, %3951, %3945 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3953 = torch.aten.round %3952 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3954 = torch.aten.sub.Tensor %3953, %3928, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3955 = torch.aten.mul.Tensor %3954, %3926 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3956 = torch.aten.broadcast_to %171, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%3957 = torch.aten.clone %3956, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%3958 = torch.aten.view %3957, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%3959 = torch.aten.mul.Tensor %172, %3958 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%3960 = torch.aten.convolution %3955, %3959, %173, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%3961 = torch.aten.permute %3960, %1196 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
%3962 = torch.aten.view %3961, %3072 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3963 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%3964 = torch.aten.sum.dim_IntList %3962, %3963, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3965 = torch.aten.div.Scalar %3964, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3966 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3967 = torch.aten.broadcast_to %3965, %3966 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3968 = torch.aten.sub.Tensor %3962, %3967, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3969 = torch.aten.mul.Tensor %3968, %3968 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3970 = torch.aten.sum.dim_IntList %3969, %3963, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3971 = torch.aten.div.Scalar %3970, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3972 = torch.aten.add.Scalar %3971, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3973 = torch.aten.rsqrt %3972 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%3974 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%3975 = torch.aten.broadcast_to %3973, %3974 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3976 = torch.aten.mul.Tensor %3968, %3975 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3977 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3978 = torch.aten.detach %3977 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3979 = torch.aten.view %3976, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3980 = torch.aten.abs %3979 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_92, %indices_93 = torch.aten.max.dim %3980, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%3981 = torch.aten.view %values_92, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%3982 = torch.aten.broadcast_to %3981, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3983 = torch.aten.clone %3982, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%3984 = torch.aten.view %3983, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3985 = torch.aten.sub.Tensor %3978, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3986 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3987 = torch.aten.pow.Tensor_Tensor %3986, %3985 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3988 = torch.aten.neg %3987 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3989 = torch.aten.neg %3988 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3990 = torch.aten.div.Tensor %3984, %3989 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3991 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3992 = torch.aten.detach %3991 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3993 = torch.aten.div.Tensor %3976, %3990 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3994 = torch.aten.add.Tensor %3993, %3992, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%3995 = torch.aten.sub.Tensor %3978, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%3996 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%3997 = torch.aten.pow.Tensor_Tensor %3996, %3995 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3998 = torch.aten.neg %3997 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%3999 = torch.aten.sub.Tensor %3978, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4000 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4001 = torch.aten.pow.Tensor_Tensor %4000, %3999 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4002 = torch.aten.sub.Tensor %4001, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4003 = torch.aten.gt.Tensor %3994, %4002 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4004 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4005 = torch.aten.to.dtype %4004, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4006 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4007 = torch.aten.broadcast_to %4005, %4006 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4008 = torch.valsem.aten.copy %4007, %4002, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4009 = torch.aten.where.self %4003, %4008, %3994 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4010 = torch.aten.lt.Tensor %4009, %3998 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4011 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4012 = torch.aten.to.dtype %4011, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4013 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4014 = torch.aten.broadcast_to %4012, %4013 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4015 = torch.valsem.aten.copy %4014, %3998, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4016 = torch.aten.where.self %4010, %4015, %4009 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4017 = torch.aten.round %4016 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4018 = torch.aten.sub.Tensor %4017, %3992, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4019 = torch.aten.mul.Tensor %4018, %3990 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4020 = torch.aten.broadcast_to %174, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4021 = torch.aten.clone %4020, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4022 = torch.aten.view %4021, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4023 = torch.aten.mul.Tensor %175, %4022 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4024 = torch.aten.transpose.int %4023, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4025 = torch.aten.view %4019, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4026 = torch.aten.mm %4025, %4024 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4027 = torch.aten.mul.Scalar %176, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4028 = torch.aten.add.Tensor %4027, %4026, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4029 = torch.aten.view %4028, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4030 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4031 = torch.aten.detach %4030 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4032 = torch.aten.view %3976, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4033 = torch.aten.abs %4032 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_94, %indices_95 = torch.aten.max.dim %4033, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%4034 = torch.aten.view %values_94, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%4035 = torch.aten.broadcast_to %4034, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4036 = torch.aten.clone %4035, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4037 = torch.aten.view %4036, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4038 = torch.aten.sub.Tensor %4031, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4039 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4040 = torch.aten.pow.Tensor_Tensor %4039, %4038 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4041 = torch.aten.neg %4040 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4042 = torch.aten.neg %4041 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4043 = torch.aten.div.Tensor %4037, %4042 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4044 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4045 = torch.aten.detach %4044 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4046 = torch.aten.div.Tensor %3976, %4043 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4047 = torch.aten.add.Tensor %4046, %4045, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4048 = torch.aten.sub.Tensor %4031, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4049 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4050 = torch.aten.pow.Tensor_Tensor %4049, %4048 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4051 = torch.aten.neg %4050 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4052 = torch.aten.sub.Tensor %4031, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4053 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4054 = torch.aten.pow.Tensor_Tensor %4053, %4052 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4055 = torch.aten.sub.Tensor %4054, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4056 = torch.aten.gt.Tensor %4047, %4055 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4057 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4058 = torch.aten.to.dtype %4057, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4059 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4060 = torch.aten.broadcast_to %4058, %4059 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4061 = torch.valsem.aten.copy %4060, %4055, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4062 = torch.aten.where.self %4056, %4061, %4047 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4063 = torch.aten.lt.Tensor %4062, %4051 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4064 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4065 = torch.aten.to.dtype %4064, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4066 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4067 = torch.aten.broadcast_to %4065, %4066 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4068 = torch.valsem.aten.copy %4067, %4051, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4069 = torch.aten.where.self %4063, %4068, %4062 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4070 = torch.aten.round %4069 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4071 = torch.aten.sub.Tensor %4070, %4045, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4072 = torch.aten.mul.Tensor %4071, %4043 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4073 = torch.aten.broadcast_to %177, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4074 = torch.aten.clone %4073, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4075 = torch.aten.view %4074, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4076 = torch.aten.mul.Tensor %178, %4075 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4077 = torch.aten.transpose.int %4076, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4078 = torch.aten.view %4072, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4079 = torch.aten.mm %4078, %4077 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4080 = torch.aten.mul.Scalar %179, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4081 = torch.aten.add.Tensor %4080, %4079, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4082 = torch.aten.view %4081, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4083 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4084 = torch.aten.detach %4083 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4085 = torch.aten.view %3976, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4086 = torch.aten.abs %4085 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_96, %indices_97 = torch.aten.max.dim %4086, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%4087 = torch.aten.view %values_96, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%4088 = torch.aten.broadcast_to %4087, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4089 = torch.aten.clone %4088, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4090 = torch.aten.view %4089, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4091 = torch.aten.sub.Tensor %4084, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4092 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4093 = torch.aten.pow.Tensor_Tensor %4092, %4091 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4094 = torch.aten.neg %4093 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4095 = torch.aten.neg %4094 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4096 = torch.aten.div.Tensor %4090, %4095 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4097 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4098 = torch.aten.detach %4097 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4099 = torch.aten.div.Tensor %3976, %4096 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4100 = torch.aten.add.Tensor %4099, %4098, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4101 = torch.aten.sub.Tensor %4084, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4102 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4103 = torch.aten.pow.Tensor_Tensor %4102, %4101 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4104 = torch.aten.neg %4103 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4105 = torch.aten.sub.Tensor %4084, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4106 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4107 = torch.aten.pow.Tensor_Tensor %4106, %4105 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4108 = torch.aten.sub.Tensor %4107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4109 = torch.aten.gt.Tensor %4100, %4108 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4110 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4111 = torch.aten.to.dtype %4110, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4112 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4113 = torch.aten.broadcast_to %4111, %4112 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4114 = torch.valsem.aten.copy %4113, %4108, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4115 = torch.aten.where.self %4109, %4114, %4100 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4116 = torch.aten.lt.Tensor %4115, %4104 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4117 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4118 = torch.aten.to.dtype %4117, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4119 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4120 = torch.aten.broadcast_to %4118, %4119 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4121 = torch.valsem.aten.copy %4120, %4104, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4122 = torch.aten.where.self %4116, %4121, %4115 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4123 = torch.aten.round %4122 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4124 = torch.aten.sub.Tensor %4123, %4098, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4125 = torch.aten.mul.Tensor %4124, %4096 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4126 = torch.aten.broadcast_to %180, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4127 = torch.aten.clone %4126, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4128 = torch.aten.view %4127, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4129 = torch.aten.mul.Tensor %181, %4128 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4130 = torch.aten.transpose.int %4129, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4131 = torch.aten.view %4125, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4132 = torch.aten.mm %4131, %4130 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4133 = torch.aten.mul.Scalar %182, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4134 = torch.aten.add.Tensor %4133, %4132, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4135 = torch.aten.view %4134, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4136 = torch.aten.view %4029, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4137 = torch.aten.permute %4136, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4138 = torch.aten.clone %4137, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4139 = torch.aten.view %4138, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4140 = torch.aten.view %4082, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4141 = torch.aten.permute %4140, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4142 = torch.aten.clone %4141, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4143 = torch.aten.view %4142, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4144 = torch.aten.view %4135, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4145 = torch.aten.permute %4144, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4146 = torch.aten.clone %4145, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4147 = torch.aten.view %4146, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4148 = torch.aten.transpose.int %4143, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
%4149 = torch.aten.broadcast_to %4139, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4150 = torch.aten.view %4149, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4151 = torch.aten.broadcast_to %4148, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
%4152 = torch.aten.view %4151, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
%4153 = torch.aten.bmm %4150, %4152 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4154 = torch.aten.view %4153, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4155 = torch.aten.mul.Tensor %4154, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%values_98, %indices_99 = torch.aten.max.dim %4155, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
%4156 = torch.aten.sub.Tensor %4155, %values_98, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4157 = torch.aten.exp %4156 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4158 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%4159 = torch.aten.sum.dim_IntList %4157, %4158, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
%4160 = torch.aten.div.Tensor %4157, %4159 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4161 = torch.aten.broadcast_to %4160, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4162 = torch.aten.view %4161, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
%4163 = torch.aten.broadcast_to %4147, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4164 = torch.aten.view %4163, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4165 = torch.aten.bmm %4162, %4164 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4166 = torch.aten.view %4165, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4167 = torch.aten.view %4166, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4168 = torch.aten.permute %4167, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4169 = torch.aten.clone %4168, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4170 = torch.aten.view %4169, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4171 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4172 = torch.aten.detach %4171 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4173 = torch.aten.view %4170, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4174 = torch.aten.abs %4173 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_100, %indices_101 = torch.aten.max.dim %4174, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%4175 = torch.aten.view %values_100, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%4176 = torch.aten.broadcast_to %4175, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4177 = torch.aten.clone %4176, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4178 = torch.aten.view %4177, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4179 = torch.aten.sub.Tensor %4172, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4180 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4181 = torch.aten.pow.Tensor_Tensor %4180, %4179 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4182 = torch.aten.neg %4181 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4183 = torch.aten.neg %4182 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4184 = torch.aten.div.Tensor %4178, %4183 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4185 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4186 = torch.aten.detach %4185 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4187 = torch.aten.div.Tensor %4170, %4184 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4188 = torch.aten.add.Tensor %4187, %4186, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4189 = torch.aten.sub.Tensor %4172, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4190 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4191 = torch.aten.pow.Tensor_Tensor %4190, %4189 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4192 = torch.aten.neg %4191 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4193 = torch.aten.sub.Tensor %4172, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4194 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4195 = torch.aten.pow.Tensor_Tensor %4194, %4193 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4196 = torch.aten.sub.Tensor %4195, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4197 = torch.aten.gt.Tensor %4188, %4196 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4198 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4199 = torch.aten.to.dtype %4198, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4200 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4201 = torch.aten.broadcast_to %4199, %4200 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4202 = torch.valsem.aten.copy %4201, %4196, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4203 = torch.aten.where.self %4197, %4202, %4188 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4204 = torch.aten.lt.Tensor %4203, %4192 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4205 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4206 = torch.aten.to.dtype %4205, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4207 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4208 = torch.aten.broadcast_to %4206, %4207 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4209 = torch.valsem.aten.copy %4208, %4192, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4210 = torch.aten.where.self %4204, %4209, %4203 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4211 = torch.aten.round %4210 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4212 = torch.aten.sub.Tensor %4211, %4186, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4213 = torch.aten.mul.Tensor %4212, %4184 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4214 = torch.aten.broadcast_to %183, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4215 = torch.aten.clone %4214, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4216 = torch.aten.view %4215, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4217 = torch.aten.mul.Tensor %184, %4216 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4218 = torch.aten.transpose.int %4217, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4219 = torch.aten.view %4213, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4220 = torch.aten.mm %4219, %4218 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4221 = torch.aten.mul.Scalar %185, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4222 = torch.aten.add.Tensor %4221, %4220, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4223 = torch.aten.view %4222, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4224 = torch.aten.add.Tensor %4223, %3962, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4225 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%4226 = torch.aten.sum.dim_IntList %4224, %4225, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4227 = torch.aten.div.Scalar %4226, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4228 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4229 = torch.aten.broadcast_to %4227, %4228 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4230 = torch.aten.sub.Tensor %4224, %4229, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4231 = torch.aten.mul.Tensor %4230, %4230 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4232 = torch.aten.sum.dim_IntList %4231, %4225, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4233 = torch.aten.div.Scalar %4232, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4234 = torch.aten.add.Scalar %4233, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4235 = torch.aten.rsqrt %4234 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4236 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4237 = torch.aten.broadcast_to %4235, %4236 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4238 = torch.aten.mul.Tensor %4230, %4237 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4239 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4240 = torch.aten.detach %4239 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4241 = torch.aten.view %4238, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4242 = torch.aten.abs %4241 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_102, %indices_103 = torch.aten.max.dim %4242, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%4243 = torch.aten.view %values_102, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%4244 = torch.aten.broadcast_to %4243, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4245 = torch.aten.clone %4244, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4246 = torch.aten.view %4245, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4247 = torch.aten.sub.Tensor %4240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4248 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4249 = torch.aten.pow.Tensor_Tensor %4248, %4247 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4250 = torch.aten.neg %4249 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4251 = torch.aten.neg %4250 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4252 = torch.aten.div.Tensor %4246, %4251 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4253 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4254 = torch.aten.detach %4253 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4255 = torch.aten.div.Tensor %4238, %4252 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4256 = torch.aten.add.Tensor %4255, %4254, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4257 = torch.aten.sub.Tensor %4240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4258 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4259 = torch.aten.pow.Tensor_Tensor %4258, %4257 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4260 = torch.aten.neg %4259 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4261 = torch.aten.sub.Tensor %4240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4262 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4263 = torch.aten.pow.Tensor_Tensor %4262, %4261 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4264 = torch.aten.sub.Tensor %4263, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4265 = torch.aten.gt.Tensor %4256, %4264 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4266 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4267 = torch.aten.to.dtype %4266, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4268 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4269 = torch.aten.broadcast_to %4267, %4268 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4270 = torch.valsem.aten.copy %4269, %4264, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4271 = torch.aten.where.self %4265, %4270, %4256 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4272 = torch.aten.lt.Tensor %4271, %4260 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4273 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4274 = torch.aten.to.dtype %4273, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4275 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4276 = torch.aten.broadcast_to %4274, %4275 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4277 = torch.valsem.aten.copy %4276, %4260, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4278 = torch.aten.where.self %4272, %4277, %4271 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4279 = torch.aten.round %4278 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4280 = torch.aten.sub.Tensor %4279, %4254, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4281 = torch.aten.mul.Tensor %4280, %4252 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4282 = torch.aten.broadcast_to %186, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4283 = torch.aten.clone %4282, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4284 = torch.aten.view %4283, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4285 = torch.aten.mul.Tensor %187, %4284 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4286 = torch.aten.transpose.int %4285, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4287 = torch.aten.view %4281, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4288 = torch.aten.mm %4287, %4286 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4289 = torch.aten.mul.Scalar %188, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4290 = torch.aten.add.Tensor %4289, %4288, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4291 = torch.aten.view %4290, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4292 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4293 = torch.aten.detach %4292 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4294 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%4295 = torch.aten.abs %4294 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_104, %indices_105 = torch.aten.max.dim %4295, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%4296 = torch.aten.view %values_104, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%4297 = torch.aten.broadcast_to %4296, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%4298 = torch.aten.clone %4297, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%4299 = torch.aten.view %4298, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4300 = torch.aten.sub.Tensor %4293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4301 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4302 = torch.aten.pow.Tensor_Tensor %4301, %4300 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4303 = torch.aten.neg %4302 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4304 = torch.aten.neg %4303 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4305 = torch.aten.div.Tensor %4299, %4304 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4306 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4307 = torch.aten.detach %4306 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4308 = torch.aten.div.Tensor %arg2, %4305 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4309 = torch.aten.add.Tensor %4308, %4307, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4310 = torch.aten.sub.Tensor %4293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4311 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4312 = torch.aten.pow.Tensor_Tensor %4311, %4310 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4313 = torch.aten.neg %4312 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4314 = torch.aten.sub.Tensor %4293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4315 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4316 = torch.aten.pow.Tensor_Tensor %4315, %4314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4317 = torch.aten.sub.Tensor %4316, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4318 = torch.aten.gt.Tensor %4309, %4317 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%4319 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4320 = torch.aten.to.dtype %4319, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4321 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4322 = torch.aten.broadcast_to %4320, %4321 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4323 = torch.valsem.aten.copy %4322, %4317, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4324 = torch.aten.where.self %4318, %4323, %4309 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4325 = torch.aten.lt.Tensor %4324, %4313 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%4326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4327 = torch.aten.to.dtype %4326, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4328 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4329 = torch.aten.broadcast_to %4327, %4328 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4330 = torch.valsem.aten.copy %4329, %4313, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4331 = torch.aten.where.self %4325, %4330, %4324 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4332 = torch.aten.round %4331 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4333 = torch.aten.sub.Tensor %4332, %4307, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4334 = torch.aten.mul.Tensor %4333, %4305 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4335 = torch.aten.broadcast_to %189, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%4336 = torch.aten.clone %4335, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%4337 = torch.aten.view %4336, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%4338 = torch.aten.mul.Tensor %190, %4337 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%4339 = torch.aten.transpose.int %4338, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
%4340 = torch.aten.view %4334, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%4341 = torch.aten.mm %4340, %4339 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
%4342 = torch.aten.view %4341, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
%4343 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4344 = torch.aten.detach %4343 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4345 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%4346 = torch.aten.abs %4345 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_106, %indices_107 = torch.aten.max.dim %4346, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%4347 = torch.aten.view %values_106, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%4348 = torch.aten.broadcast_to %4347, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%4349 = torch.aten.clone %4348, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%4350 = torch.aten.view %4349, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4351 = torch.aten.sub.Tensor %4344, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4352 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4353 = torch.aten.pow.Tensor_Tensor %4352, %4351 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4354 = torch.aten.neg %4353 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4355 = torch.aten.neg %4354 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4356 = torch.aten.div.Tensor %4350, %4355 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4357 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4358 = torch.aten.detach %4357 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4359 = torch.aten.div.Tensor %arg2, %4356 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4360 = torch.aten.add.Tensor %4359, %4358, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4361 = torch.aten.sub.Tensor %4344, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4362 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4363 = torch.aten.pow.Tensor_Tensor %4362, %4361 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4364 = torch.aten.neg %4363 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4365 = torch.aten.sub.Tensor %4344, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4366 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4367 = torch.aten.pow.Tensor_Tensor %4366, %4365 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4368 = torch.aten.sub.Tensor %4367, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4369 = torch.aten.gt.Tensor %4360, %4368 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%4370 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4371 = torch.aten.to.dtype %4370, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4372 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4373 = torch.aten.broadcast_to %4371, %4372 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4374 = torch.valsem.aten.copy %4373, %4368, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4375 = torch.aten.where.self %4369, %4374, %4360 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4376 = torch.aten.lt.Tensor %4375, %4364 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%4377 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4378 = torch.aten.to.dtype %4377, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4379 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4380 = torch.aten.broadcast_to %4378, %4379 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4381 = torch.valsem.aten.copy %4380, %4364, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4382 = torch.aten.where.self %4376, %4381, %4375 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4383 = torch.aten.round %4382 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4384 = torch.aten.sub.Tensor %4383, %4358, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4385 = torch.aten.mul.Tensor %4384, %4356 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%4386 = torch.aten.broadcast_to %191, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%4387 = torch.aten.clone %4386, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
%4388 = torch.aten.view %4387, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%4389 = torch.aten.mul.Tensor %192, %4388 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
%4390 = torch.aten.transpose.int %4389, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
%4391 = torch.aten.view %4385, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%4392 = torch.aten.mm %4391, %4390 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
%4393 = torch.aten.view %4392, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
%4394 = torch.aten.view %4291, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4395 = torch.aten.permute %4394, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4396 = torch.aten.clone %4395, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4397 = torch.aten.view %4396, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4398 = torch.aten.view %4342, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
%4399 = torch.aten.permute %4398, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%4400 = torch.aten.clone %4399, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%4401 = torch.aten.view %4400, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%4402 = torch.aten.view %4393, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
%4403 = torch.aten.permute %4402, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%4404 = torch.aten.clone %4403, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
%4405 = torch.aten.view %4404, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%4406 = torch.aten.transpose.int %4401, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
%4407 = torch.aten.broadcast_to %4397, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4408 = torch.aten.view %4407, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4409 = torch.aten.broadcast_to %4406, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
%4410 = torch.aten.view %4409, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
%4411 = torch.aten.bmm %4408, %4410 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4412 = torch.aten.view %4411, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4413 = torch.aten.mul.Tensor %4412, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%values_108, %indices_109 = torch.aten.max.dim %4413, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
%4414 = torch.aten.sub.Tensor %4413, %values_108, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4415 = torch.aten.exp %4414 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4416 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%4417 = torch.aten.sum.dim_IntList %4415, %4416, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
%4418 = torch.aten.div.Tensor %4415, %4417 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4419 = torch.aten.broadcast_to %4418, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4420 = torch.aten.view %4419, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
%4421 = torch.aten.broadcast_to %4405, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%4422 = torch.aten.view %4421, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
%4423 = torch.aten.bmm %4420, %4422 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4424 = torch.aten.view %4423, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
%4425 = torch.aten.view %4424, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
%4426 = torch.aten.permute %4425, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4427 = torch.aten.clone %4426, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
%4428 = torch.aten.view %4427, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4429 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4430 = torch.aten.detach %4429 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4431 = torch.aten.view %4428, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4432 = torch.aten.abs %4431 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%values_110, %indices_111 = torch.aten.max.dim %4432, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
%4433 = torch.aten.view %values_110, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
%4434 = torch.aten.broadcast_to %4433, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4435 = torch.aten.clone %4434, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
%4436 = torch.aten.view %4435, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4437 = torch.aten.sub.Tensor %4430, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4438 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4439 = torch.aten.pow.Tensor_Tensor %4438, %4437 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4440 = torch.aten.neg %4439 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4441 = torch.aten.neg %4440 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4442 = torch.aten.div.Tensor %4436, %4441 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4443 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4444 = torch.aten.detach %4443 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4445 = torch.aten.div.Tensor %4428, %4442 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4446 = torch.aten.add.Tensor %4445, %4444, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4447 = torch.aten.sub.Tensor %4430, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4448 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4449 = torch.aten.pow.Tensor_Tensor %4448, %4447 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4450 = torch.aten.neg %4449 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4451 = torch.aten.sub.Tensor %4430, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4452 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4453 = torch.aten.pow.Tensor_Tensor %4452, %4451 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4454 = torch.aten.sub.Tensor %4453, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4455 = torch.aten.gt.Tensor %4446, %4454 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4456 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4457 = torch.aten.to.dtype %4456, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4458 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4459 = torch.aten.broadcast_to %4457, %4458 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4460 = torch.valsem.aten.copy %4459, %4454, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4461 = torch.aten.where.self %4455, %4460, %4446 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4462 = torch.aten.lt.Tensor %4461, %4450 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
%4463 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4464 = torch.aten.to.dtype %4463, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4465 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4466 = torch.aten.broadcast_to %4464, %4465 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4467 = torch.valsem.aten.copy %4466, %4450, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4468 = torch.aten.where.self %4462, %4467, %4461 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4469 = torch.aten.round %4468 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4470 = torch.aten.sub.Tensor %4469, %4444, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4471 = torch.aten.mul.Tensor %4470, %4442 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4472 = torch.aten.broadcast_to %193, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4473 = torch.aten.clone %4472, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
%4474 = torch.aten.view %4473, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4475 = torch.aten.mul.Tensor %194, %4474 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4476 = torch.aten.transpose.int %4475, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
%4477 = torch.aten.view %4471, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4478 = torch.aten.mm %4477, %4476 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4479 = torch.aten.mul.Scalar %195, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4480 = torch.aten.add.Tensor %4479, %4478, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4481 = torch.aten.view %4480, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4482 = torch.aten.add.Tensor %4481, %4224, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4483 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%4484 = torch.aten.sum.dim_IntList %4482, %4483, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4485 = torch.aten.div.Scalar %4484, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4486 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4487 = torch.aten.broadcast_to %4485, %4486 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4488 = torch.aten.sub.Tensor %4482, %4487, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4489 = torch.aten.mul.Tensor %4488, %4488 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4490 = torch.aten.sum.dim_IntList %4489, %4483, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4491 = torch.aten.div.Scalar %4490, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4492 = torch.aten.add.Scalar %4491, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4493 = torch.aten.rsqrt %4492 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
%4494 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4495 = torch.aten.broadcast_to %4493, %4494 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4496 = torch.aten.mul.Tensor %4488, %4495 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4497 = torch.aten.broadcast_to %196, %3625 : !torch.vtensor<[5120,40,1],f16>, !torch.list<int> -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
%4498 = torch.aten.clone %4497, %int0 : !torch.vtensor<[5120,40,16],f16>, !torch.int -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
%4499 = torch.aten.view %4498, %3628 : !torch.vtensor<[5120,40,16],f16>, !torch.list<int> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
%4500 = torch.aten.mul.Tensor %197, %4499 : !torch.vtensor<[5120,640],si8>, !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
%4501 = torch.aten.transpose.int %4500, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16> loc(#loc1)
%4502 = torch.aten.view %4496, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4503 = torch.aten.mm %4502, %4501 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
%4504 = torch.aten.mul.Scalar %198, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16> loc(#loc1)
%4505 = torch.aten.add.Tensor %4504, %4503, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
%4506 = torch.aten.view %4505, %3636 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16> loc(#loc1)
%4507 = torch.aten.slice.Tensor %4506, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%4508 = torch.aten.slice.Tensor %4506, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%4509 = torch.aten.gelu %4508, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%4510 = torch.aten.mul.Tensor %4507, %4509 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
%4511 = torch.aten.broadcast_to %199, %3642 : !torch.vtensor<[640,160,1],f16>, !torch.list<int> -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
%4512 = torch.aten.clone %4511, %int0 : !torch.vtensor<[640,160,16],f16>, !torch.int -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
%4513 = torch.aten.view %4512, %3645 : !torch.vtensor<[640,160,16],f16>, !torch.list<int> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
%4514 = torch.aten.mul.Tensor %200, %4513 : !torch.vtensor<[640,2560],si8>, !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
%4515 = torch.aten.transpose.int %4514, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16> loc(#loc1)
%4516 = torch.aten.view %4510, %3649 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16> loc(#loc1)
%4517 = torch.aten.mm %4516, %4515 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4518 = torch.aten.mul.Scalar %201, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
%4519 = torch.aten.add.Tensor %4518, %4517, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
%4520 = torch.aten.view %4519, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4521 = torch.aten.add.Tensor %4520, %4482, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
%4522 = torch.aten.view %4521, %3656 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
%4523 = torch.aten.permute %4522, %1789 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4524 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4525 = torch.aten.detach %4524 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4526 = torch.aten.view %4523, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%4527 = torch.aten.abs %4526 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_112, %indices_113 = torch.aten.max.dim %4527, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%4528 = torch.aten.view %values_112, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%4529 = torch.aten.broadcast_to %4528, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%4530 = torch.aten.clone %4529, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%4531 = torch.aten.view %4530, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4532 = torch.aten.sub.Tensor %4525, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4533 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4534 = torch.aten.pow.Tensor_Tensor %4533, %4532 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4535 = torch.aten.neg %4534 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4536 = torch.aten.neg %4535 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4537 = torch.aten.div.Tensor %4531, %4536 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4538 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4539 = torch.aten.detach %4538 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4540 = torch.aten.div.Tensor %4523, %4537 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4541 = torch.aten.add.Tensor %4540, %4539, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4542 = torch.aten.sub.Tensor %4525, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4543 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4544 = torch.aten.pow.Tensor_Tensor %4543, %4542 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4545 = torch.aten.neg %4544 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4546 = torch.aten.sub.Tensor %4525, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4547 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4548 = torch.aten.pow.Tensor_Tensor %4547, %4546 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4549 = torch.aten.sub.Tensor %4548, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4550 = torch.aten.gt.Tensor %4541, %4549 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%4551 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4552 = torch.aten.to.dtype %4551, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4553 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4554 = torch.aten.broadcast_to %4552, %4553 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4555 = torch.valsem.aten.copy %4554, %4549, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4556 = torch.aten.where.self %4550, %4555, %4541 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4557 = torch.aten.lt.Tensor %4556, %4545 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%4558 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4559 = torch.aten.to.dtype %4558, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4560 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4561 = torch.aten.broadcast_to %4559, %4560 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4562 = torch.valsem.aten.copy %4561, %4545, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4563 = torch.aten.where.self %4557, %4562, %4556 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4564 = torch.aten.round %4563 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4565 = torch.aten.sub.Tensor %4564, %4539, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4566 = torch.aten.mul.Tensor %4565, %4537 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4567 = torch.aten.broadcast_to %202, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%4568 = torch.aten.clone %4567, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
%4569 = torch.aten.view %4568, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%4570 = torch.aten.mul.Tensor %203, %4569 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
%4571 = torch.aten.convolution %4566, %4570, %204, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4572 = torch.aten.add.Tensor %4571, %3885, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4573 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4574 = torch.aten.detach %4573 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4575 = torch.aten.view %4572, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%4576 = torch.aten.abs %4575 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%values_114, %indices_115 = torch.aten.max.dim %4576, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
%4577 = torch.aten.view %values_114, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
%4578 = torch.aten.broadcast_to %4577, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%4579 = torch.aten.clone %4578, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
%4580 = torch.aten.view %4579, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4581 = torch.aten.sub.Tensor %4574, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4582 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4583 = torch.aten.pow.Tensor_Tensor %4582, %4581 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4584 = torch.aten.neg %4583 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4585 = torch.aten.neg %4584 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4586 = torch.aten.div.Tensor %4580, %4585 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4587 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4588 = torch.aten.detach %4587 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4589 = torch.aten.div.Tensor %4572, %4586 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4590 = torch.aten.add.Tensor %4589, %4588, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4591 = torch.aten.sub.Tensor %4574, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4592 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4593 = torch.aten.pow.Tensor_Tensor %4592, %4591 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4594 = torch.aten.neg %4593 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4595 = torch.aten.sub.Tensor %4574, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4596 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4597 = torch.aten.pow.Tensor_Tensor %4596, %4595 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4598 = torch.aten.sub.Tensor %4597, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4599 = torch.aten.gt.Tensor %4590, %4598 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%4600 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4601 = torch.aten.to.dtype %4600, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4602 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4603 = torch.aten.broadcast_to %4601, %4602 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4604 = torch.valsem.aten.copy %4603, %4598, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4605 = torch.aten.where.self %4599, %4604, %4590 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4606 = torch.aten.lt.Tensor %4605, %4594 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
%4607 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4608 = torch.aten.to.dtype %4607, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4609 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4610 = torch.aten.broadcast_to %4608, %4609 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4611 = torch.valsem.aten.copy %4610, %4594, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4612 = torch.aten.where.self %4606, %4611, %4605 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4613 = torch.aten.round %4612 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4614 = torch.aten.sub.Tensor %4613, %4588, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4615 = torch.aten.mul.Tensor %4614, %4586 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
%4616 = torch.aten.broadcast_to %205, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%4617 = torch.aten.clone %4616, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
%4618 = torch.aten.view %4617, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%4619 = torch.aten.mul.Tensor %206, %4618 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
%4620 = torch.aten.convolution %4615, %4619, %207, %2752, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4621 = torch.aten.clone %4620, %int0 : !torch.vtensor<[2,640,16,16],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4622 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4623 = torch.aten.view %4621, %4622 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16> loc(#loc1)
%4624 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4625 = torch.aten.to.dtype %4624, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4626 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4627 = torch.aten.broadcast_to %4625, %4626 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
%4628 = torch.valsem.aten.copy %4627, %4623, %false : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,20,256],f16>, !torch.bool -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
%4629 = torch.aten.to.dtype %4628, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f64> loc(#loc1)
%4630 = torch.aten.sum.dim_IntList %4629, %943, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4631 = torch.aten.div.Scalar %4630, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4632 = torch.aten.sub.Tensor %4629, %4631, %float1.000000e00 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,256],f64> loc(#loc1)
%4633 = torch.aten.mul.Tensor %4632, %4632 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,20,256],f64> -> !torch.vtensor<[2,32,20,256],f64> loc(#loc1)
%4634 = torch.aten.sum.dim_IntList %4633, %943, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4635 = torch.aten.div.Scalar %4634, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4636 = torch.aten.to.dtype %4635, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4637 = torch.aten.sum.dim_IntList %4628, %943, %true, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4638 = torch.aten.div.Scalar %4637, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4639 = torch.aten.add.Tensor %4636, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4640 = torch.aten.rsqrt %4639 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4641 = torch.aten.sub.Tensor %4623, %4638, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
%4642 = torch.aten.mul.Tensor %4641, %4640 : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
%4643 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4644 = torch.aten.view %4642, %4643 : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f32> loc(#loc1)
%4645 = torch.aten.unsqueeze %208, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%4646 = torch.aten.unsqueeze %4645, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%4647 = torch.aten.mul.Tensor %4644, %4646 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f32> loc(#loc1)
%4648 = torch.aten.unsqueeze %209, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
%4649 = torch.aten.unsqueeze %4648, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
%4650 = torch.aten.add.Tensor %4647, %4649, %int1 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f32> loc(#loc1)
%4651 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4652 = torch.aten.to.dtype %4651, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4653 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4654 = torch.aten.broadcast_to %4652, %4653 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4655 = torch.valsem.aten.copy %4654, %4650, %false : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f32>, !torch.bool -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4656 = torch.aten.sigmoid %4655 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4657 = torch.aten.mul.Tensor %4656, %4655 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4658 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4659 = torch.aten.detach %4658 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4660 = torch.prim.ListConstruct %int2, %int40, %int16, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4661 = torch.aten.view %4657, %4660 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%4662 = torch.aten.abs %4661 : !torch.vtensor<[2,40,16,16,16],f16> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%values_116, %indices_117 = torch.aten.max.dim %4662, %int2, %true : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,16,16],f16>, !torch.vtensor<[2,40,1,16,16],si64> loc(#loc1)
%4663 = torch.prim.ListConstruct %int2, %int40, %int1, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4664 = torch.aten.view %values_116, %4663 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,16,16],f16> loc(#loc1)
%4665 = torch.aten.broadcast_to %4664, %4660 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%4666 = torch.aten.clone %4665, %int0 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%4667 = torch.aten.view %4666, %4643 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4668 = torch.aten.sub.Tensor %4659, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4669 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4670 = torch.aten.pow.Tensor_Tensor %4669, %4668 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4671 = torch.aten.neg %4670 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4672 = torch.aten.neg %4671 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4673 = torch.aten.div.Tensor %4667, %4672 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4674 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4675 = torch.aten.detach %4674 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4676 = torch.aten.div.Tensor %4657, %4673 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4677 = torch.aten.add.Tensor %4676, %4675, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4678 = torch.aten.sub.Tensor %4659, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4679 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4680 = torch.aten.pow.Tensor_Tensor %4679, %4678 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4681 = torch.aten.neg %4680 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4682 = torch.aten.sub.Tensor %4659, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4683 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4684 = torch.aten.pow.Tensor_Tensor %4683, %4682 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4685 = torch.aten.sub.Tensor %4684, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4686 = torch.aten.gt.Tensor %4677, %4685 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
%4687 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4688 = torch.aten.to.dtype %4687, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4689 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4690 = torch.aten.broadcast_to %4688, %4689 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4691 = torch.valsem.aten.copy %4690, %4685, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4692 = torch.aten.where.self %4686, %4691, %4677 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4693 = torch.aten.lt.Tensor %4692, %4681 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
%4694 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4695 = torch.aten.to.dtype %4694, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4696 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4697 = torch.aten.broadcast_to %4695, %4696 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4698 = torch.valsem.aten.copy %4697, %4681, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4699 = torch.aten.where.self %4693, %4698, %4692 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4700 = torch.aten.round %4699 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4701 = torch.aten.sub.Tensor %4700, %4675, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4702 = torch.aten.mul.Tensor %4701, %4673 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4703 = torch.prim.ListConstruct %int1280, %int40, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%4704 = torch.aten.broadcast_to %210, %4703 : !torch.vtensor<[1280,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,40,16,3,3],f16> loc(#loc1)
%4705 = torch.aten.clone %4704, %int0 : !torch.vtensor<[1280,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,40,16,3,3],f16> loc(#loc1)
%4706 = torch.prim.ListConstruct %int1280, %int640, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%4707 = torch.aten.view %4705, %4706 : !torch.vtensor<[1280,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,640,3,3],f16> loc(#loc1)
%4708 = torch.aten.mul.Tensor %211, %4707 : !torch.vtensor<[1280,640,3,3],si8>, !torch.vtensor<[1280,640,3,3],f16> -> !torch.vtensor<[1280,640,3,3],f16> loc(#loc1)
%4709 = torch.aten.convolution %4702, %4708, %212, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4710 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%4711 = torch.aten.mul.Tensor %4710, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%4712 = torch.aten.transpose.int %213, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%4713 = torch.aten.mm %4711, %4712 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%4714 = torch.aten.mul.Scalar %214, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%4715 = torch.aten.add.Tensor %4714, %4713, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%4716 = torch.aten.slice.Tensor %4715, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%4717 = torch.aten.slice.Tensor %4716, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%4718 = torch.aten.unsqueeze %4717, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> loc(#loc1)
%4719 = torch.aten.unsqueeze %4718, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> loc(#loc1)
%4720 = torch.aten.add.Tensor %4709, %4719, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4721 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4722 = torch.aten.view %4720, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
%4723 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4724 = torch.aten.to.dtype %4723, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4725 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4726 = torch.aten.broadcast_to %4724, %4725 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4727 = torch.valsem.aten.copy %4726, %4722, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4728 = torch.aten.to.dtype %4727, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%4729 = torch.aten.sum.dim_IntList %4728, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4730 = torch.aten.div.Scalar %4729, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4731 = torch.aten.sub.Tensor %4728, %4730, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%4732 = torch.aten.mul.Tensor %4731, %4731 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%4733 = torch.aten.sum.dim_IntList %4732, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4734 = torch.aten.div.Scalar %4733, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4735 = torch.aten.to.dtype %4734, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4736 = torch.aten.sum.dim_IntList %4727, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4737 = torch.aten.div.Scalar %4736, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4738 = torch.aten.add.Tensor %4735, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4739 = torch.aten.rsqrt %4738 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4740 = torch.aten.sub.Tensor %4722, %4737, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4741 = torch.aten.mul.Tensor %4740, %4739 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4742 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4743 = torch.aten.view %4741, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%4744 = torch.aten.unsqueeze %215, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
%4745 = torch.aten.unsqueeze %4744, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
%4746 = torch.aten.mul.Tensor %4743, %4745 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%4747 = torch.aten.unsqueeze %216, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
%4748 = torch.aten.unsqueeze %4747, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
%4749 = torch.aten.add.Tensor %4746, %4748, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%4750 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4751 = torch.aten.to.dtype %4750, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4752 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4753 = torch.aten.broadcast_to %4751, %4752 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4754 = torch.valsem.aten.copy %4753, %4749, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4755 = torch.aten.sigmoid %4754 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4756 = torch.aten.mul.Tensor %4755, %4754 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4757 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4758 = torch.aten.detach %4757 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4759 = torch.prim.ListConstruct %int2, %int80, %int16, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4760 = torch.aten.view %4756, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%4761 = torch.aten.abs %4760 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_118, %indices_119 = torch.aten.max.dim %4761, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%4762 = torch.prim.ListConstruct %int2, %int80, %int1, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4763 = torch.aten.view %values_118, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%4764 = torch.aten.broadcast_to %4763, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%4765 = torch.aten.clone %4764, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%4766 = torch.aten.view %4765, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4767 = torch.aten.sub.Tensor %4758, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4768 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4769 = torch.aten.pow.Tensor_Tensor %4768, %4767 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4770 = torch.aten.neg %4769 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4771 = torch.aten.neg %4770 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4772 = torch.aten.div.Tensor %4766, %4771 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4773 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4774 = torch.aten.detach %4773 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4775 = torch.aten.div.Tensor %4756, %4772 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4776 = torch.aten.add.Tensor %4775, %4774, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4777 = torch.aten.sub.Tensor %4758, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4778 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4779 = torch.aten.pow.Tensor_Tensor %4778, %4777 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4780 = torch.aten.neg %4779 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4781 = torch.aten.sub.Tensor %4758, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4782 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4783 = torch.aten.pow.Tensor_Tensor %4782, %4781 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4784 = torch.aten.sub.Tensor %4783, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4785 = torch.aten.gt.Tensor %4776, %4784 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%4786 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4787 = torch.aten.to.dtype %4786, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4788 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4789 = torch.aten.broadcast_to %4787, %4788 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4790 = torch.valsem.aten.copy %4789, %4784, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4791 = torch.aten.where.self %4785, %4790, %4776 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4792 = torch.aten.lt.Tensor %4791, %4780 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%4793 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4794 = torch.aten.to.dtype %4793, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4795 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4796 = torch.aten.broadcast_to %4794, %4795 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4797 = torch.valsem.aten.copy %4796, %4780, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4798 = torch.aten.where.self %4792, %4797, %4791 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4799 = torch.aten.round %4798 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4800 = torch.aten.sub.Tensor %4799, %4774, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4801 = torch.aten.mul.Tensor %4800, %4772 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4802 = torch.prim.ListConstruct %int1280, %int80, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%4803 = torch.aten.broadcast_to %217, %4802 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
%4804 = torch.aten.clone %4803, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
%4805 = torch.prim.ListConstruct %int1280, %int1280, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%4806 = torch.aten.view %4804, %4805 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
%4807 = torch.aten.mul.Tensor %218, %4806 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
%4808 = torch.aten.convolution %4801, %4807, %219, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4809 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4810 = torch.aten.detach %4809 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4811 = torch.aten.view %4620, %4660 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%4812 = torch.aten.abs %4811 : !torch.vtensor<[2,40,16,16,16],f16> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%values_120, %indices_121 = torch.aten.max.dim %4812, %int2, %true : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,16,16],f16>, !torch.vtensor<[2,40,1,16,16],si64> loc(#loc1)
%4813 = torch.aten.view %values_120, %4663 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,16,16],f16> loc(#loc1)
%4814 = torch.aten.broadcast_to %4813, %4660 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%4815 = torch.aten.clone %4814, %int0 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
%4816 = torch.aten.view %4815, %4643 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4817 = torch.aten.sub.Tensor %4810, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4818 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4819 = torch.aten.pow.Tensor_Tensor %4818, %4817 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4820 = torch.aten.neg %4819 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4821 = torch.aten.neg %4820 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4822 = torch.aten.div.Tensor %4816, %4821 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4823 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4824 = torch.aten.detach %4823 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4825 = torch.aten.div.Tensor %4620, %4822 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4826 = torch.aten.add.Tensor %4825, %4824, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4827 = torch.aten.sub.Tensor %4810, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4828 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4829 = torch.aten.pow.Tensor_Tensor %4828, %4827 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4830 = torch.aten.neg %4829 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4831 = torch.aten.sub.Tensor %4810, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4832 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4833 = torch.aten.pow.Tensor_Tensor %4832, %4831 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4834 = torch.aten.sub.Tensor %4833, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4835 = torch.aten.gt.Tensor %4826, %4834 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
%4836 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4837 = torch.aten.to.dtype %4836, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4838 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4839 = torch.aten.broadcast_to %4837, %4838 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4840 = torch.valsem.aten.copy %4839, %4834, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4841 = torch.aten.where.self %4835, %4840, %4826 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4842 = torch.aten.lt.Tensor %4841, %4830 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
%4843 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4844 = torch.aten.to.dtype %4843, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4845 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4846 = torch.aten.broadcast_to %4844, %4845 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4847 = torch.valsem.aten.copy %4846, %4830, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4848 = torch.aten.where.self %4842, %4847, %4841 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4849 = torch.aten.round %4848 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4850 = torch.aten.sub.Tensor %4849, %4824, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4851 = torch.aten.mul.Tensor %4850, %4822 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
%4852 = torch.prim.ListConstruct %int1280, %int40, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%4853 = torch.aten.broadcast_to %220, %4852 : !torch.vtensor<[1280,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,40,16,1,1],f16> loc(#loc1)
%4854 = torch.aten.clone %4853, %int0 : !torch.vtensor<[1280,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,40,16,1,1],f16> loc(#loc1)
%4855 = torch.prim.ListConstruct %int1280, %int640, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%4856 = torch.aten.view %4854, %4855 : !torch.vtensor<[1280,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,640,1,1],f16> loc(#loc1)
%4857 = torch.aten.mul.Tensor %221, %4856 : !torch.vtensor<[1280,640,1,1],si8>, !torch.vtensor<[1280,640,1,1],f16> -> !torch.vtensor<[1280,640,1,1],f16> loc(#loc1)
%4858 = torch.aten.convolution %4851, %4857, %222, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4859 = torch.aten.add.Tensor %4858, %4808, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4860 = torch.aten.div.Tensor %4859, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4861 = torch.aten.clone %4860, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4862 = torch.aten.view %4861, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
%4863 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4864 = torch.aten.to.dtype %4863, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4865 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4866 = torch.aten.broadcast_to %4864, %4865 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4867 = torch.valsem.aten.copy %4866, %4862, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4868 = torch.aten.to.dtype %4867, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%4869 = torch.aten.sum.dim_IntList %4868, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4870 = torch.aten.div.Scalar %4869, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4871 = torch.aten.sub.Tensor %4868, %4870, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%4872 = torch.aten.mul.Tensor %4871, %4871 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%4873 = torch.aten.sum.dim_IntList %4872, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4874 = torch.aten.div.Scalar %4873, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%4875 = torch.aten.to.dtype %4874, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4876 = torch.aten.sum.dim_IntList %4867, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4877 = torch.aten.div.Scalar %4876, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4878 = torch.aten.add.Tensor %4875, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4879 = torch.aten.rsqrt %4878 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%4880 = torch.aten.sub.Tensor %4862, %4877, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4881 = torch.aten.mul.Tensor %4880, %4879 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%4882 = torch.aten.view %4881, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%4883 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4884 = torch.aten.to.dtype %4883, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4885 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4886 = torch.aten.broadcast_to %4884, %4885 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4887 = torch.valsem.aten.copy %4886, %4882, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4888 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4889 = torch.aten.detach %4888 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4890 = torch.aten.view %4887, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%4891 = torch.aten.abs %4890 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_122, %indices_123 = torch.aten.max.dim %4891, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%4892 = torch.aten.view %values_122, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%4893 = torch.aten.broadcast_to %4892, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%4894 = torch.aten.clone %4893, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%4895 = torch.aten.view %4894, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4896 = torch.aten.sub.Tensor %4889, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4897 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4898 = torch.aten.pow.Tensor_Tensor %4897, %4896 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4899 = torch.aten.neg %4898 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4900 = torch.aten.neg %4899 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4901 = torch.aten.div.Tensor %4895, %4900 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4902 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4903 = torch.aten.detach %4902 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4904 = torch.aten.div.Tensor %4887, %4901 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4905 = torch.aten.add.Tensor %4904, %4903, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4906 = torch.aten.sub.Tensor %4889, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4907 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4908 = torch.aten.pow.Tensor_Tensor %4907, %4906 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4909 = torch.aten.neg %4908 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4910 = torch.aten.sub.Tensor %4889, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4911 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4912 = torch.aten.pow.Tensor_Tensor %4911, %4910 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4913 = torch.aten.sub.Tensor %4912, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4914 = torch.aten.gt.Tensor %4905, %4913 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%4915 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4916 = torch.aten.to.dtype %4915, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4917 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4918 = torch.aten.broadcast_to %4916, %4917 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4919 = torch.valsem.aten.copy %4918, %4913, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4920 = torch.aten.where.self %4914, %4919, %4905 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4921 = torch.aten.lt.Tensor %4920, %4909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%4922 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4923 = torch.aten.to.dtype %4922, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4924 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4925 = torch.aten.broadcast_to %4923, %4924 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4926 = torch.valsem.aten.copy %4925, %4909, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4927 = torch.aten.where.self %4921, %4926, %4920 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4928 = torch.aten.round %4927 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4929 = torch.aten.sub.Tensor %4928, %4903, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4930 = torch.aten.mul.Tensor %4929, %4901 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4931 = torch.prim.ListConstruct %int1280, %int80, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%4932 = torch.aten.broadcast_to %223, %4931 : !torch.vtensor<[1280,80,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
%4933 = torch.aten.clone %4932, %int0 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
%4934 = torch.prim.ListConstruct %int1280, %int1280, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%4935 = torch.aten.view %4933, %4934 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
%4936 = torch.aten.mul.Tensor %224, %4935 : !torch.vtensor<[1280,1280,1,1],si8>, !torch.vtensor<[1280,1280,1,1],f16> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
%4937 = torch.aten.convolution %4930, %4936, %225, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%4938 = torch.aten.permute %4937, %1196 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
%4939 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%4940 = torch.aten.view %4938, %4939 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4941 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%4942 = torch.aten.sum.dim_IntList %4940, %4941, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%4943 = torch.aten.div.Scalar %4942, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%4944 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4945 = torch.aten.broadcast_to %4943, %4944 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4946 = torch.aten.sub.Tensor %4940, %4945, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4947 = torch.aten.mul.Tensor %4946, %4946 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4948 = torch.aten.sum.dim_IntList %4947, %4941, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%4949 = torch.aten.div.Scalar %4948, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%4950 = torch.aten.add.Scalar %4949, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%4951 = torch.aten.rsqrt %4950 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%4952 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%4953 = torch.aten.broadcast_to %4951, %4952 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4954 = torch.aten.mul.Tensor %4946, %4953 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4955 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4956 = torch.aten.detach %4955 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4957 = torch.prim.ListConstruct %int2, %int256, %int80, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4958 = torch.aten.view %4954, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%4959 = torch.aten.abs %4958 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_124, %indices_125 = torch.aten.max.dim %4959, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%4960 = torch.prim.ListConstruct %int2, %int256, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%4961 = torch.aten.view %values_124, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%4962 = torch.aten.broadcast_to %4961, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%4963 = torch.aten.clone %4962, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%4964 = torch.aten.view %4963, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4965 = torch.aten.sub.Tensor %4956, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4966 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4967 = torch.aten.pow.Tensor_Tensor %4966, %4965 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4968 = torch.aten.neg %4967 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4969 = torch.aten.neg %4968 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4970 = torch.aten.div.Tensor %4964, %4969 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4971 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4972 = torch.aten.detach %4971 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4973 = torch.aten.div.Tensor %4954, %4970 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4974 = torch.aten.add.Tensor %4973, %4972, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4975 = torch.aten.sub.Tensor %4956, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4976 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4977 = torch.aten.pow.Tensor_Tensor %4976, %4975 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4978 = torch.aten.neg %4977 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4979 = torch.aten.sub.Tensor %4956, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4980 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%4981 = torch.aten.pow.Tensor_Tensor %4980, %4979 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%4982 = torch.aten.sub.Tensor %4981, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%4983 = torch.aten.gt.Tensor %4974, %4982 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%4984 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4985 = torch.aten.to.dtype %4984, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4986 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4987 = torch.aten.broadcast_to %4985, %4986 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4988 = torch.valsem.aten.copy %4987, %4982, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4989 = torch.aten.where.self %4983, %4988, %4974 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4990 = torch.aten.lt.Tensor %4989, %4978 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%4991 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%4992 = torch.aten.to.dtype %4991, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%4993 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%4994 = torch.aten.broadcast_to %4992, %4993 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%4995 = torch.valsem.aten.copy %4994, %4978, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%4996 = torch.aten.where.self %4990, %4995, %4989 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4997 = torch.aten.round %4996 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4998 = torch.aten.sub.Tensor %4997, %4972, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%4999 = torch.aten.mul.Tensor %4998, %4970 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5000 = torch.prim.ListConstruct %int1280, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5001 = torch.aten.broadcast_to %226, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5002 = torch.aten.clone %5001, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5003 = torch.prim.ListConstruct %int1280, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%5004 = torch.aten.view %5002, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5005 = torch.aten.mul.Tensor %227, %5004 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5006 = torch.aten.transpose.int %5005, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5007 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%5008 = torch.aten.view %4999, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5009 = torch.aten.mm %5008, %5006 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5010 = torch.aten.mul.Scalar %228, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5011 = torch.aten.add.Tensor %5010, %5009, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5012 = torch.aten.view %5011, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5013 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5014 = torch.aten.detach %5013 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5015 = torch.aten.view %4954, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5016 = torch.aten.abs %5015 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_126, %indices_127 = torch.aten.max.dim %5016, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5017 = torch.aten.view %values_126, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5018 = torch.aten.broadcast_to %5017, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5019 = torch.aten.clone %5018, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5020 = torch.aten.view %5019, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5021 = torch.aten.sub.Tensor %5014, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5022 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5023 = torch.aten.pow.Tensor_Tensor %5022, %5021 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5024 = torch.aten.neg %5023 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5025 = torch.aten.neg %5024 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5026 = torch.aten.div.Tensor %5020, %5025 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5027 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5028 = torch.aten.detach %5027 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5029 = torch.aten.div.Tensor %4954, %5026 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5030 = torch.aten.add.Tensor %5029, %5028, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5031 = torch.aten.sub.Tensor %5014, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5032 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5033 = torch.aten.pow.Tensor_Tensor %5032, %5031 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5034 = torch.aten.neg %5033 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5035 = torch.aten.sub.Tensor %5014, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5036 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5037 = torch.aten.pow.Tensor_Tensor %5036, %5035 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5038 = torch.aten.sub.Tensor %5037, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5039 = torch.aten.gt.Tensor %5030, %5038 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5040 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5041 = torch.aten.to.dtype %5040, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5042 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5043 = torch.aten.broadcast_to %5041, %5042 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5044 = torch.valsem.aten.copy %5043, %5038, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5045 = torch.aten.where.self %5039, %5044, %5030 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5046 = torch.aten.lt.Tensor %5045, %5034 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5047 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5048 = torch.aten.to.dtype %5047, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5049 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5050 = torch.aten.broadcast_to %5048, %5049 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5051 = torch.valsem.aten.copy %5050, %5034, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5052 = torch.aten.where.self %5046, %5051, %5045 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5053 = torch.aten.round %5052 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5054 = torch.aten.sub.Tensor %5053, %5028, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5055 = torch.aten.mul.Tensor %5054, %5026 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5056 = torch.aten.broadcast_to %229, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5057 = torch.aten.clone %5056, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5058 = torch.aten.view %5057, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5059 = torch.aten.mul.Tensor %230, %5058 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5060 = torch.aten.transpose.int %5059, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5061 = torch.aten.view %5055, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5062 = torch.aten.mm %5061, %5060 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5063 = torch.aten.mul.Scalar %231, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5064 = torch.aten.add.Tensor %5063, %5062, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5065 = torch.aten.view %5064, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5066 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5067 = torch.aten.detach %5066 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5068 = torch.aten.view %4954, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5069 = torch.aten.abs %5068 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_128, %indices_129 = torch.aten.max.dim %5069, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5070 = torch.aten.view %values_128, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5071 = torch.aten.broadcast_to %5070, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5072 = torch.aten.clone %5071, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5073 = torch.aten.view %5072, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5074 = torch.aten.sub.Tensor %5067, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5075 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5076 = torch.aten.pow.Tensor_Tensor %5075, %5074 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5077 = torch.aten.neg %5076 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5078 = torch.aten.neg %5077 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5079 = torch.aten.div.Tensor %5073, %5078 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5080 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5081 = torch.aten.detach %5080 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5082 = torch.aten.div.Tensor %4954, %5079 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5083 = torch.aten.add.Tensor %5082, %5081, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5084 = torch.aten.sub.Tensor %5067, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5085 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5086 = torch.aten.pow.Tensor_Tensor %5085, %5084 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5087 = torch.aten.neg %5086 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5088 = torch.aten.sub.Tensor %5067, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5089 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5090 = torch.aten.pow.Tensor_Tensor %5089, %5088 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5091 = torch.aten.sub.Tensor %5090, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5092 = torch.aten.gt.Tensor %5083, %5091 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5093 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5094 = torch.aten.to.dtype %5093, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5095 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5096 = torch.aten.broadcast_to %5094, %5095 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5097 = torch.valsem.aten.copy %5096, %5091, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5098 = torch.aten.where.self %5092, %5097, %5083 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5099 = torch.aten.lt.Tensor %5098, %5087 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5100 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5101 = torch.aten.to.dtype %5100, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5102 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5103 = torch.aten.broadcast_to %5101, %5102 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5104 = torch.valsem.aten.copy %5103, %5087, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5105 = torch.aten.where.self %5099, %5104, %5098 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5106 = torch.aten.round %5105 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5107 = torch.aten.sub.Tensor %5106, %5081, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5108 = torch.aten.mul.Tensor %5107, %5079 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5109 = torch.aten.broadcast_to %232, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5110 = torch.aten.clone %5109, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5111 = torch.aten.view %5110, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5112 = torch.aten.mul.Tensor %233, %5111 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5113 = torch.aten.transpose.int %5112, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5114 = torch.aten.view %5108, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5115 = torch.aten.mm %5114, %5113 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5116 = torch.aten.mul.Scalar %234, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5117 = torch.aten.add.Tensor %5116, %5115, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5118 = torch.aten.view %5117, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5119 = torch.prim.ListConstruct %int2, %int256, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%5120 = torch.aten.view %5012, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5121 = torch.aten.permute %5120, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5122 = torch.aten.clone %5121, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5123 = torch.prim.ListConstruct %int16, %int256, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5124 = torch.aten.view %5122, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5125 = torch.aten.view %5065, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5126 = torch.aten.permute %5125, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5127 = torch.aten.clone %5126, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5128 = torch.aten.view %5127, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5129 = torch.aten.view %5118, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5130 = torch.aten.permute %5129, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5131 = torch.aten.clone %5130, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5132 = torch.aten.view %5131, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5133 = torch.aten.transpose.int %5128, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
%5134 = torch.aten.broadcast_to %5124, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5135 = torch.aten.view %5134, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5136 = torch.prim.ListConstruct %int16, %int160, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5137 = torch.aten.broadcast_to %5133, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
%5138 = torch.aten.view %5137, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
%5139 = torch.aten.bmm %5135, %5138 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5140 = torch.prim.ListConstruct %int16, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%5141 = torch.aten.view %5139, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5142 = torch.aten.mul.Tensor %5141, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%values_130, %indices_131 = torch.aten.max.dim %5142, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
%5143 = torch.aten.sub.Tensor %5142, %values_130, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5144 = torch.aten.exp %5143 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5145 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%5146 = torch.aten.sum.dim_IntList %5144, %5145, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
%5147 = torch.aten.div.Tensor %5144, %5146 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5148 = torch.aten.broadcast_to %5147, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5149 = torch.aten.view %5148, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%5150 = torch.aten.broadcast_to %5132, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5151 = torch.aten.view %5150, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5152 = torch.aten.bmm %5149, %5151 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5153 = torch.aten.view %5152, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5154 = torch.prim.ListConstruct %int2, %int8, %int256, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%5155 = torch.aten.view %5153, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5156 = torch.aten.permute %5155, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5157 = torch.aten.clone %5156, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5158 = torch.aten.view %5157, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5159 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5160 = torch.aten.detach %5159 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5161 = torch.aten.view %5158, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5162 = torch.aten.abs %5161 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_132, %indices_133 = torch.aten.max.dim %5162, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5163 = torch.aten.view %values_132, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5164 = torch.aten.broadcast_to %5163, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5165 = torch.aten.clone %5164, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5166 = torch.aten.view %5165, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5167 = torch.aten.sub.Tensor %5160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5168 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5169 = torch.aten.pow.Tensor_Tensor %5168, %5167 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5170 = torch.aten.neg %5169 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5171 = torch.aten.neg %5170 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5172 = torch.aten.div.Tensor %5166, %5171 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5173 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5174 = torch.aten.detach %5173 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5175 = torch.aten.div.Tensor %5158, %5172 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5176 = torch.aten.add.Tensor %5175, %5174, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5177 = torch.aten.sub.Tensor %5160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5178 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5179 = torch.aten.pow.Tensor_Tensor %5178, %5177 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5180 = torch.aten.neg %5179 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5181 = torch.aten.sub.Tensor %5160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5182 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5183 = torch.aten.pow.Tensor_Tensor %5182, %5181 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5184 = torch.aten.sub.Tensor %5183, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5185 = torch.aten.gt.Tensor %5176, %5184 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5186 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5187 = torch.aten.to.dtype %5186, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5188 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5189 = torch.aten.broadcast_to %5187, %5188 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5190 = torch.valsem.aten.copy %5189, %5184, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5191 = torch.aten.where.self %5185, %5190, %5176 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5192 = torch.aten.lt.Tensor %5191, %5180 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5193 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5194 = torch.aten.to.dtype %5193, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5195 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5196 = torch.aten.broadcast_to %5194, %5195 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5197 = torch.valsem.aten.copy %5196, %5180, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5198 = torch.aten.where.self %5192, %5197, %5191 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5199 = torch.aten.round %5198 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5200 = torch.aten.sub.Tensor %5199, %5174, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5201 = torch.aten.mul.Tensor %5200, %5172 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5202 = torch.aten.broadcast_to %235, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5203 = torch.aten.clone %5202, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5204 = torch.aten.view %5203, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5205 = torch.aten.mul.Tensor %236, %5204 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5206 = torch.aten.transpose.int %5205, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5207 = torch.aten.view %5201, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5208 = torch.aten.mm %5207, %5206 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5209 = torch.aten.mul.Scalar %237, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5210 = torch.aten.add.Tensor %5209, %5208, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5211 = torch.aten.view %5210, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5212 = torch.aten.add.Tensor %5211, %4940, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5213 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%5214 = torch.aten.sum.dim_IntList %5212, %5213, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5215 = torch.aten.div.Scalar %5214, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5216 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5217 = torch.aten.broadcast_to %5215, %5216 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5218 = torch.aten.sub.Tensor %5212, %5217, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5219 = torch.aten.mul.Tensor %5218, %5218 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5220 = torch.aten.sum.dim_IntList %5219, %5213, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5221 = torch.aten.div.Scalar %5220, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5222 = torch.aten.add.Scalar %5221, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5223 = torch.aten.rsqrt %5222 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5224 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5225 = torch.aten.broadcast_to %5223, %5224 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5226 = torch.aten.mul.Tensor %5218, %5225 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5227 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5228 = torch.aten.detach %5227 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5229 = torch.aten.view %5226, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5230 = torch.aten.abs %5229 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_134, %indices_135 = torch.aten.max.dim %5230, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5231 = torch.aten.view %values_134, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5232 = torch.aten.broadcast_to %5231, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5233 = torch.aten.clone %5232, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5234 = torch.aten.view %5233, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5235 = torch.aten.sub.Tensor %5228, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5236 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5237 = torch.aten.pow.Tensor_Tensor %5236, %5235 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5238 = torch.aten.neg %5237 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5239 = torch.aten.neg %5238 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5240 = torch.aten.div.Tensor %5234, %5239 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5241 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5242 = torch.aten.detach %5241 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5243 = torch.aten.div.Tensor %5226, %5240 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5244 = torch.aten.add.Tensor %5243, %5242, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5245 = torch.aten.sub.Tensor %5228, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5246 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5247 = torch.aten.pow.Tensor_Tensor %5246, %5245 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5248 = torch.aten.neg %5247 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5249 = torch.aten.sub.Tensor %5228, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5250 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5251 = torch.aten.pow.Tensor_Tensor %5250, %5249 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5252 = torch.aten.sub.Tensor %5251, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5253 = torch.aten.gt.Tensor %5244, %5252 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5254 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5255 = torch.aten.to.dtype %5254, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5256 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5257 = torch.aten.broadcast_to %5255, %5256 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5258 = torch.valsem.aten.copy %5257, %5252, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5259 = torch.aten.where.self %5253, %5258, %5244 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5260 = torch.aten.lt.Tensor %5259, %5248 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5261 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5262 = torch.aten.to.dtype %5261, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5263 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5264 = torch.aten.broadcast_to %5262, %5263 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5265 = torch.valsem.aten.copy %5264, %5248, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5266 = torch.aten.where.self %5260, %5265, %5259 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5267 = torch.aten.round %5266 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5268 = torch.aten.sub.Tensor %5267, %5242, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5269 = torch.aten.mul.Tensor %5268, %5240 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5270 = torch.aten.broadcast_to %238, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5271 = torch.aten.clone %5270, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5272 = torch.aten.view %5271, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5273 = torch.aten.mul.Tensor %239, %5272 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5274 = torch.aten.transpose.int %5273, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5275 = torch.aten.view %5269, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5276 = torch.aten.mm %5275, %5274 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5277 = torch.aten.mul.Scalar %240, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5278 = torch.aten.add.Tensor %5277, %5276, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5279 = torch.aten.view %5278, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5280 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5281 = torch.aten.detach %5280 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5282 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%5283 = torch.aten.abs %5282 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_136, %indices_137 = torch.aten.max.dim %5283, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%5284 = torch.aten.view %values_136, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%5285 = torch.aten.broadcast_to %5284, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%5286 = torch.aten.clone %5285, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%5287 = torch.aten.view %5286, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5288 = torch.aten.sub.Tensor %5281, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5289 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5290 = torch.aten.pow.Tensor_Tensor %5289, %5288 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5291 = torch.aten.neg %5290 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5292 = torch.aten.neg %5291 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5293 = torch.aten.div.Tensor %5287, %5292 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5294 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5295 = torch.aten.detach %5294 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5296 = torch.aten.div.Tensor %arg2, %5293 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5297 = torch.aten.add.Tensor %5296, %5295, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5298 = torch.aten.sub.Tensor %5281, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5299 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5300 = torch.aten.pow.Tensor_Tensor %5299, %5298 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5301 = torch.aten.neg %5300 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5302 = torch.aten.sub.Tensor %5281, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5303 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5304 = torch.aten.pow.Tensor_Tensor %5303, %5302 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5305 = torch.aten.sub.Tensor %5304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5306 = torch.aten.gt.Tensor %5297, %5305 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%5307 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5308 = torch.aten.to.dtype %5307, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5309 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5310 = torch.aten.broadcast_to %5308, %5309 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5311 = torch.valsem.aten.copy %5310, %5305, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5312 = torch.aten.where.self %5306, %5311, %5297 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5313 = torch.aten.lt.Tensor %5312, %5301 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%5314 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5315 = torch.aten.to.dtype %5314, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5316 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5317 = torch.aten.broadcast_to %5315, %5316 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5318 = torch.valsem.aten.copy %5317, %5301, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5319 = torch.aten.where.self %5313, %5318, %5312 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5320 = torch.aten.round %5319 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5321 = torch.aten.sub.Tensor %5320, %5295, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5322 = torch.aten.mul.Tensor %5321, %5293 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5323 = torch.prim.ListConstruct %int1280, %int48, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5324 = torch.aten.broadcast_to %241, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%5325 = torch.aten.clone %5324, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%5326 = torch.prim.ListConstruct %int1280, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%5327 = torch.aten.view %5325, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%5328 = torch.aten.mul.Tensor %242, %5327 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%5329 = torch.aten.transpose.int %5328, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
%5330 = torch.aten.view %5322, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%5331 = torch.aten.mm %5330, %5329 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
%5332 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
%5333 = torch.aten.view %5331, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
%5334 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5335 = torch.aten.detach %5334 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5336 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%5337 = torch.aten.abs %5336 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_138, %indices_139 = torch.aten.max.dim %5337, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%5338 = torch.aten.view %values_138, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%5339 = torch.aten.broadcast_to %5338, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%5340 = torch.aten.clone %5339, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%5341 = torch.aten.view %5340, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5342 = torch.aten.sub.Tensor %5335, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5343 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5344 = torch.aten.pow.Tensor_Tensor %5343, %5342 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5345 = torch.aten.neg %5344 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5346 = torch.aten.neg %5345 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5347 = torch.aten.div.Tensor %5341, %5346 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5348 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5349 = torch.aten.detach %5348 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5350 = torch.aten.div.Tensor %arg2, %5347 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5351 = torch.aten.add.Tensor %5350, %5349, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5352 = torch.aten.sub.Tensor %5335, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5353 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5354 = torch.aten.pow.Tensor_Tensor %5353, %5352 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5355 = torch.aten.neg %5354 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5356 = torch.aten.sub.Tensor %5335, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5357 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5358 = torch.aten.pow.Tensor_Tensor %5357, %5356 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5359 = torch.aten.sub.Tensor %5358, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5360 = torch.aten.gt.Tensor %5351, %5359 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%5361 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5362 = torch.aten.to.dtype %5361, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5363 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5364 = torch.aten.broadcast_to %5362, %5363 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5365 = torch.valsem.aten.copy %5364, %5359, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5366 = torch.aten.where.self %5360, %5365, %5351 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5367 = torch.aten.lt.Tensor %5366, %5355 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%5368 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5369 = torch.aten.to.dtype %5368, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5370 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5371 = torch.aten.broadcast_to %5369, %5370 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5372 = torch.valsem.aten.copy %5371, %5355, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5373 = torch.aten.where.self %5367, %5372, %5366 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5374 = torch.aten.round %5373 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5375 = torch.aten.sub.Tensor %5374, %5349, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5376 = torch.aten.mul.Tensor %5375, %5347 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%5377 = torch.aten.broadcast_to %243, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%5378 = torch.aten.clone %5377, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%5379 = torch.aten.view %5378, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%5380 = torch.aten.mul.Tensor %244, %5379 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%5381 = torch.aten.transpose.int %5380, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
%5382 = torch.aten.view %5376, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%5383 = torch.aten.mm %5382, %5381 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
%5384 = torch.aten.view %5383, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
%5385 = torch.aten.view %5279, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5386 = torch.aten.permute %5385, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5387 = torch.aten.clone %5386, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5388 = torch.aten.view %5387, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5389 = torch.prim.ListConstruct %int2, %int77, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%5390 = torch.aten.view %5333, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
%5391 = torch.aten.permute %5390, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%5392 = torch.aten.clone %5391, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%5393 = torch.prim.ListConstruct %int16, %int77, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5394 = torch.aten.view %5392, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%5395 = torch.aten.view %5384, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
%5396 = torch.aten.permute %5395, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%5397 = torch.aten.clone %5396, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%5398 = torch.aten.view %5397, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%5399 = torch.aten.transpose.int %5394, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
%5400 = torch.aten.broadcast_to %5388, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5401 = torch.aten.view %5400, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5402 = torch.prim.ListConstruct %int16, %int160, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5403 = torch.aten.broadcast_to %5399, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
%5404 = torch.aten.view %5403, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
%5405 = torch.aten.bmm %5401, %5404 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5406 = torch.prim.ListConstruct %int16, %int256, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%5407 = torch.aten.view %5405, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5408 = torch.aten.mul.Tensor %5407, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%values_140, %indices_141 = torch.aten.max.dim %5408, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
%5409 = torch.aten.sub.Tensor %5408, %values_140, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5410 = torch.aten.exp %5409 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5411 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%5412 = torch.aten.sum.dim_IntList %5410, %5411, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
%5413 = torch.aten.div.Tensor %5410, %5412 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5414 = torch.aten.broadcast_to %5413, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5415 = torch.aten.view %5414, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%5416 = torch.aten.broadcast_to %5398, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%5417 = torch.aten.view %5416, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%5418 = torch.aten.bmm %5415, %5417 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5419 = torch.aten.view %5418, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%5420 = torch.aten.view %5419, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%5421 = torch.aten.permute %5420, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5422 = torch.aten.clone %5421, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%5423 = torch.aten.view %5422, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5424 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5425 = torch.aten.detach %5424 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5426 = torch.aten.view %5423, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5427 = torch.aten.abs %5426 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_142, %indices_143 = torch.aten.max.dim %5427, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5428 = torch.aten.view %values_142, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5429 = torch.aten.broadcast_to %5428, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5430 = torch.aten.clone %5429, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5431 = torch.aten.view %5430, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5432 = torch.aten.sub.Tensor %5425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5433 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5434 = torch.aten.pow.Tensor_Tensor %5433, %5432 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5435 = torch.aten.neg %5434 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5436 = torch.aten.neg %5435 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5437 = torch.aten.div.Tensor %5431, %5436 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5438 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5439 = torch.aten.detach %5438 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5440 = torch.aten.div.Tensor %5423, %5437 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5441 = torch.aten.add.Tensor %5440, %5439, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5442 = torch.aten.sub.Tensor %5425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5443 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5444 = torch.aten.pow.Tensor_Tensor %5443, %5442 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5445 = torch.aten.neg %5444 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5446 = torch.aten.sub.Tensor %5425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5447 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5448 = torch.aten.pow.Tensor_Tensor %5447, %5446 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5449 = torch.aten.sub.Tensor %5448, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5450 = torch.aten.gt.Tensor %5441, %5449 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5451 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5452 = torch.aten.to.dtype %5451, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5453 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5454 = torch.aten.broadcast_to %5452, %5453 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5455 = torch.valsem.aten.copy %5454, %5449, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5456 = torch.aten.where.self %5450, %5455, %5441 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5457 = torch.aten.lt.Tensor %5456, %5445 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5458 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5459 = torch.aten.to.dtype %5458, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5460 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5461 = torch.aten.broadcast_to %5459, %5460 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5462 = torch.valsem.aten.copy %5461, %5445, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5463 = torch.aten.where.self %5457, %5462, %5456 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5464 = torch.aten.round %5463 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5465 = torch.aten.sub.Tensor %5464, %5439, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5466 = torch.aten.mul.Tensor %5465, %5437 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5467 = torch.aten.broadcast_to %245, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5468 = torch.aten.clone %5467, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5469 = torch.aten.view %5468, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5470 = torch.aten.mul.Tensor %246, %5469 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5471 = torch.aten.transpose.int %5470, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5472 = torch.aten.view %5466, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5473 = torch.aten.mm %5472, %5471 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5474 = torch.aten.mul.Scalar %247, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5475 = torch.aten.add.Tensor %5474, %5473, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5476 = torch.aten.view %5475, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5477 = torch.aten.add.Tensor %5476, %5212, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5478 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%5479 = torch.aten.sum.dim_IntList %5477, %5478, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5480 = torch.aten.div.Scalar %5479, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5481 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5482 = torch.aten.broadcast_to %5480, %5481 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5483 = torch.aten.sub.Tensor %5477, %5482, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5484 = torch.aten.mul.Tensor %5483, %5483 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5485 = torch.aten.sum.dim_IntList %5484, %5478, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5486 = torch.aten.div.Scalar %5485, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5487 = torch.aten.add.Scalar %5486, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5488 = torch.aten.rsqrt %5487 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5489 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5490 = torch.aten.broadcast_to %5488, %5489 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5491 = torch.aten.mul.Tensor %5483, %5490 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5492 = torch.prim.ListConstruct %int10240, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5493 = torch.aten.broadcast_to %248, %5492 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
%5494 = torch.aten.clone %5493, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
%5495 = torch.prim.ListConstruct %int10240, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%5496 = torch.aten.view %5494, %5495 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
%5497 = torch.aten.mul.Tensor %249, %5496 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
%5498 = torch.aten.transpose.int %5497, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> loc(#loc1)
%5499 = torch.aten.view %5491, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5500 = torch.aten.mm %5499, %5498 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16> loc(#loc1)
%5501 = torch.aten.mul.Scalar %250, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> loc(#loc1)
%5502 = torch.aten.add.Tensor %5501, %5500, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16> loc(#loc1)
%5503 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%5504 = torch.aten.view %5502, %5503 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16> loc(#loc1)
%5505 = torch.aten.slice.Tensor %5504, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%5506 = torch.aten.slice.Tensor %5504, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%5507 = torch.aten.gelu %5506, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%5508 = torch.aten.mul.Tensor %5505, %5507 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%5509 = torch.prim.ListConstruct %int1280, %int320, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
%5510 = torch.aten.broadcast_to %251, %5509 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
%5511 = torch.aten.clone %5510, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
%5512 = torch.prim.ListConstruct %int1280, %int5120 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
%5513 = torch.aten.view %5511, %5512 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
%5514 = torch.aten.mul.Tensor %252, %5513 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
%5515 = torch.aten.transpose.int %5514, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> loc(#loc1)
%5516 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
%5517 = torch.aten.view %5508, %5516 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16> loc(#loc1)
%5518 = torch.aten.mm %5517, %5515 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5519 = torch.aten.mul.Scalar %253, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5520 = torch.aten.add.Tensor %5519, %5518, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5521 = torch.aten.view %5520, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5522 = torch.aten.add.Tensor %5521, %5477, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5523 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
%5524 = torch.aten.view %5522, %5523 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
%5525 = torch.aten.permute %5524, %1789 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5526 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5527 = torch.aten.detach %5526 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5528 = torch.aten.view %5525, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5529 = torch.aten.abs %5528 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_144, %indices_145 = torch.aten.max.dim %5529, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%5530 = torch.aten.view %values_144, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%5531 = torch.aten.broadcast_to %5530, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5532 = torch.aten.clone %5531, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5533 = torch.aten.view %5532, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5534 = torch.aten.sub.Tensor %5527, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5535 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5536 = torch.aten.pow.Tensor_Tensor %5535, %5534 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5537 = torch.aten.neg %5536 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5538 = torch.aten.neg %5537 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5539 = torch.aten.div.Tensor %5533, %5538 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5540 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5541 = torch.aten.detach %5540 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5542 = torch.aten.div.Tensor %5525, %5539 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5543 = torch.aten.add.Tensor %5542, %5541, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5544 = torch.aten.sub.Tensor %5527, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5545 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5546 = torch.aten.pow.Tensor_Tensor %5545, %5544 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5547 = torch.aten.neg %5546 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5548 = torch.aten.sub.Tensor %5527, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5549 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5550 = torch.aten.pow.Tensor_Tensor %5549, %5548 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5551 = torch.aten.sub.Tensor %5550, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5552 = torch.aten.gt.Tensor %5543, %5551 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5553 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5554 = torch.aten.to.dtype %5553, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5555 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5556 = torch.aten.broadcast_to %5554, %5555 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5557 = torch.valsem.aten.copy %5556, %5551, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5558 = torch.aten.where.self %5552, %5557, %5543 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5559 = torch.aten.lt.Tensor %5558, %5547 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5560 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5561 = torch.aten.to.dtype %5560, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5562 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5563 = torch.aten.broadcast_to %5561, %5562 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5564 = torch.valsem.aten.copy %5563, %5547, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5565 = torch.aten.where.self %5559, %5564, %5558 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5566 = torch.aten.round %5565 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5567 = torch.aten.sub.Tensor %5566, %5541, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5568 = torch.aten.mul.Tensor %5567, %5539 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5569 = torch.aten.broadcast_to %254, %4931 : !torch.vtensor<[1280,80,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
%5570 = torch.aten.clone %5569, %int0 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
%5571 = torch.aten.view %5570, %4934 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
%5572 = torch.aten.mul.Tensor %255, %5571 : !torch.vtensor<[1280,1280,1,1],si8>, !torch.vtensor<[1280,1280,1,1],f16> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
%5573 = torch.aten.convolution %5568, %5572, %256, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5574 = torch.aten.add.Tensor %5573, %4860, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5575 = torch.aten.clone %5574, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5576 = torch.aten.view %5575, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
%5577 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5578 = torch.aten.to.dtype %5577, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5579 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5580 = torch.aten.broadcast_to %5578, %5579 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5581 = torch.valsem.aten.copy %5580, %5576, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5582 = torch.aten.to.dtype %5581, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5583 = torch.aten.sum.dim_IntList %5582, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5584 = torch.aten.div.Scalar %5583, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5585 = torch.aten.sub.Tensor %5582, %5584, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5586 = torch.aten.mul.Tensor %5585, %5585 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5587 = torch.aten.sum.dim_IntList %5586, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5588 = torch.aten.div.Scalar %5587, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5589 = torch.aten.to.dtype %5588, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5590 = torch.aten.sum.dim_IntList %5581, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5591 = torch.aten.div.Scalar %5590, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5592 = torch.aten.add.Tensor %5589, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5593 = torch.aten.rsqrt %5592 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5594 = torch.aten.sub.Tensor %5576, %5591, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5595 = torch.aten.mul.Tensor %5594, %5593 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5596 = torch.aten.view %5595, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5597 = torch.aten.unsqueeze %257, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
%5598 = torch.aten.unsqueeze %5597, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
%5599 = torch.aten.mul.Tensor %5596, %5598 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5600 = torch.aten.unsqueeze %258, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
%5601 = torch.aten.unsqueeze %5600, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
%5602 = torch.aten.add.Tensor %5599, %5601, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5603 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5604 = torch.aten.to.dtype %5603, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5605 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5606 = torch.aten.broadcast_to %5604, %5605 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5607 = torch.valsem.aten.copy %5606, %5602, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5608 = torch.aten.sigmoid %5607 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5609 = torch.aten.mul.Tensor %5608, %5607 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5610 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5611 = torch.aten.detach %5610 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5612 = torch.aten.view %5609, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5613 = torch.aten.abs %5612 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_146, %indices_147 = torch.aten.max.dim %5613, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%5614 = torch.aten.view %values_146, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%5615 = torch.aten.broadcast_to %5614, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5616 = torch.aten.clone %5615, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5617 = torch.aten.view %5616, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5618 = torch.aten.sub.Tensor %5611, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5619 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5620 = torch.aten.pow.Tensor_Tensor %5619, %5618 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5621 = torch.aten.neg %5620 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5622 = torch.aten.neg %5621 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5623 = torch.aten.div.Tensor %5617, %5622 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5624 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5625 = torch.aten.detach %5624 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5626 = torch.aten.div.Tensor %5609, %5623 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5627 = torch.aten.add.Tensor %5626, %5625, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5628 = torch.aten.sub.Tensor %5611, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5629 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5630 = torch.aten.pow.Tensor_Tensor %5629, %5628 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5631 = torch.aten.neg %5630 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5632 = torch.aten.sub.Tensor %5611, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5633 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5634 = torch.aten.pow.Tensor_Tensor %5633, %5632 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5635 = torch.aten.sub.Tensor %5634, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5636 = torch.aten.gt.Tensor %5627, %5635 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5637 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5638 = torch.aten.to.dtype %5637, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5639 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5640 = torch.aten.broadcast_to %5638, %5639 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5641 = torch.valsem.aten.copy %5640, %5635, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5642 = torch.aten.where.self %5636, %5641, %5627 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5643 = torch.aten.lt.Tensor %5642, %5631 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5644 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5645 = torch.aten.to.dtype %5644, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5646 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5647 = torch.aten.broadcast_to %5645, %5646 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5648 = torch.valsem.aten.copy %5647, %5631, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5649 = torch.aten.where.self %5643, %5648, %5642 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5650 = torch.aten.round %5649 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5651 = torch.aten.sub.Tensor %5650, %5625, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5652 = torch.aten.mul.Tensor %5651, %5623 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5653 = torch.aten.broadcast_to %259, %4802 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
%5654 = torch.aten.clone %5653, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
%5655 = torch.aten.view %5654, %4805 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
%5656 = torch.aten.mul.Tensor %260, %5655 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
%5657 = torch.aten.convolution %5652, %5656, %261, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5658 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%5659 = torch.aten.mul.Tensor %5658, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%5660 = torch.aten.transpose.int %262, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5661 = torch.aten.mm %5659, %5660 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%5662 = torch.aten.mul.Scalar %263, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5663 = torch.aten.add.Tensor %5662, %5661, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%5664 = torch.aten.slice.Tensor %5663, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%5665 = torch.aten.slice.Tensor %5664, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
%5666 = torch.aten.unsqueeze %5665, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> loc(#loc1)
%5667 = torch.aten.unsqueeze %5666, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> loc(#loc1)
%5668 = torch.aten.add.Tensor %5657, %5667, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5669 = torch.aten.view %5668, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
%5670 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5671 = torch.aten.to.dtype %5670, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5672 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5673 = torch.aten.broadcast_to %5671, %5672 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5674 = torch.valsem.aten.copy %5673, %5669, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5675 = torch.aten.to.dtype %5674, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5676 = torch.aten.sum.dim_IntList %5675, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5677 = torch.aten.div.Scalar %5676, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5678 = torch.aten.sub.Tensor %5675, %5677, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5679 = torch.aten.mul.Tensor %5678, %5678 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5680 = torch.aten.sum.dim_IntList %5679, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5681 = torch.aten.div.Scalar %5680, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5682 = torch.aten.to.dtype %5681, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5683 = torch.aten.sum.dim_IntList %5674, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5684 = torch.aten.div.Scalar %5683, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5685 = torch.aten.add.Tensor %5682, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5686 = torch.aten.rsqrt %5685 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5687 = torch.aten.sub.Tensor %5669, %5684, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5688 = torch.aten.mul.Tensor %5687, %5686 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5689 = torch.aten.view %5688, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5690 = torch.aten.unsqueeze %264, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
%5691 = torch.aten.unsqueeze %5690, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
%5692 = torch.aten.mul.Tensor %5689, %5691 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5693 = torch.aten.unsqueeze %265, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
%5694 = torch.aten.unsqueeze %5693, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
%5695 = torch.aten.add.Tensor %5692, %5694, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5696 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5697 = torch.aten.to.dtype %5696, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5698 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5699 = torch.aten.broadcast_to %5697, %5698 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5700 = torch.valsem.aten.copy %5699, %5695, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5701 = torch.aten.sigmoid %5700 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5702 = torch.aten.mul.Tensor %5701, %5700 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5703 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5704 = torch.aten.detach %5703 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5705 = torch.aten.view %5702, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5706 = torch.aten.abs %5705 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_148, %indices_149 = torch.aten.max.dim %5706, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%5707 = torch.aten.view %values_148, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%5708 = torch.aten.broadcast_to %5707, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5709 = torch.aten.clone %5708, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5710 = torch.aten.view %5709, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5711 = torch.aten.sub.Tensor %5704, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5712 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5713 = torch.aten.pow.Tensor_Tensor %5712, %5711 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5714 = torch.aten.neg %5713 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5715 = torch.aten.neg %5714 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5716 = torch.aten.div.Tensor %5710, %5715 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5717 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5718 = torch.aten.detach %5717 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5719 = torch.aten.div.Tensor %5702, %5716 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5720 = torch.aten.add.Tensor %5719, %5718, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5721 = torch.aten.sub.Tensor %5704, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5722 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5723 = torch.aten.pow.Tensor_Tensor %5722, %5721 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5724 = torch.aten.neg %5723 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5725 = torch.aten.sub.Tensor %5704, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5726 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5727 = torch.aten.pow.Tensor_Tensor %5726, %5725 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5728 = torch.aten.sub.Tensor %5727, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5729 = torch.aten.gt.Tensor %5720, %5728 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5730 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5731 = torch.aten.to.dtype %5730, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5732 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5733 = torch.aten.broadcast_to %5731, %5732 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5734 = torch.valsem.aten.copy %5733, %5728, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5735 = torch.aten.where.self %5729, %5734, %5720 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5736 = torch.aten.lt.Tensor %5735, %5724 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5737 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5738 = torch.aten.to.dtype %5737, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5739 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5740 = torch.aten.broadcast_to %5738, %5739 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5741 = torch.valsem.aten.copy %5740, %5724, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5742 = torch.aten.where.self %5736, %5741, %5735 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5743 = torch.aten.round %5742 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5744 = torch.aten.sub.Tensor %5743, %5718, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5745 = torch.aten.mul.Tensor %5744, %5716 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5746 = torch.aten.broadcast_to %266, %4802 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
%5747 = torch.aten.clone %5746, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
%5748 = torch.aten.view %5747, %4805 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
%5749 = torch.aten.mul.Tensor %267, %5748 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
%5750 = torch.aten.convolution %5745, %5749, %268, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5751 = torch.aten.add.Tensor %5574, %5750, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5752 = torch.aten.div.Tensor %5751, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5753 = torch.aten.clone %5752, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5754 = torch.aten.view %5753, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
%5755 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5756 = torch.aten.to.dtype %5755, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5757 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5758 = torch.aten.broadcast_to %5756, %5757 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5759 = torch.valsem.aten.copy %5758, %5754, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5760 = torch.aten.to.dtype %5759, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5761 = torch.aten.sum.dim_IntList %5760, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5762 = torch.aten.div.Scalar %5761, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5763 = torch.aten.sub.Tensor %5760, %5762, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5764 = torch.aten.mul.Tensor %5763, %5763 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
%5765 = torch.aten.sum.dim_IntList %5764, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5766 = torch.aten.div.Scalar %5765, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
%5767 = torch.aten.to.dtype %5766, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5768 = torch.aten.sum.dim_IntList %5759, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5769 = torch.aten.div.Scalar %5768, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5770 = torch.aten.add.Tensor %5767, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5771 = torch.aten.rsqrt %5770 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
%5772 = torch.aten.sub.Tensor %5754, %5769, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5773 = torch.aten.mul.Tensor %5772, %5771 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
%5774 = torch.aten.view %5773, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
%5775 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5776 = torch.aten.to.dtype %5775, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5777 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5778 = torch.aten.broadcast_to %5776, %5777 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5779 = torch.valsem.aten.copy %5778, %5774, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5780 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5781 = torch.aten.detach %5780 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5782 = torch.aten.view %5779, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5783 = torch.aten.abs %5782 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_150, %indices_151 = torch.aten.max.dim %5783, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%5784 = torch.aten.view %values_150, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%5785 = torch.aten.broadcast_to %5784, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5786 = torch.aten.clone %5785, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%5787 = torch.aten.view %5786, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5788 = torch.aten.sub.Tensor %5781, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5789 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5790 = torch.aten.pow.Tensor_Tensor %5789, %5788 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5791 = torch.aten.neg %5790 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5792 = torch.aten.neg %5791 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5793 = torch.aten.div.Tensor %5787, %5792 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5794 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5795 = torch.aten.detach %5794 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5796 = torch.aten.div.Tensor %5779, %5793 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5797 = torch.aten.add.Tensor %5796, %5795, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5798 = torch.aten.sub.Tensor %5781, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5799 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5800 = torch.aten.pow.Tensor_Tensor %5799, %5798 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5801 = torch.aten.neg %5800 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5802 = torch.aten.sub.Tensor %5781, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5803 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5804 = torch.aten.pow.Tensor_Tensor %5803, %5802 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5805 = torch.aten.sub.Tensor %5804, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5806 = torch.aten.gt.Tensor %5797, %5805 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5807 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5808 = torch.aten.to.dtype %5807, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5809 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5810 = torch.aten.broadcast_to %5808, %5809 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5811 = torch.valsem.aten.copy %5810, %5805, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5812 = torch.aten.where.self %5806, %5811, %5797 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5813 = torch.aten.lt.Tensor %5812, %5801 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
%5814 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5815 = torch.aten.to.dtype %5814, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5816 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5817 = torch.aten.broadcast_to %5815, %5816 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5818 = torch.valsem.aten.copy %5817, %5801, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5819 = torch.aten.where.self %5813, %5818, %5812 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5820 = torch.aten.round %5819 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5821 = torch.aten.sub.Tensor %5820, %5795, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5822 = torch.aten.mul.Tensor %5821, %5793 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5823 = torch.aten.broadcast_to %269, %4931 : !torch.vtensor<[1280,80,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
%5824 = torch.aten.clone %5823, %int0 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
%5825 = torch.aten.view %5824, %4934 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
%5826 = torch.aten.mul.Tensor %270, %5825 : !torch.vtensor<[1280,1280,1,1],si8>, !torch.vtensor<[1280,1280,1,1],f16> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
%5827 = torch.aten.convolution %5822, %5826, %271, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%5828 = torch.aten.permute %5827, %1196 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
%5829 = torch.aten.view %5828, %4939 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5830 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%5831 = torch.aten.sum.dim_IntList %5829, %5830, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5832 = torch.aten.div.Scalar %5831, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5833 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5834 = torch.aten.broadcast_to %5832, %5833 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5835 = torch.aten.sub.Tensor %5829, %5834, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5836 = torch.aten.mul.Tensor %5835, %5835 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5837 = torch.aten.sum.dim_IntList %5836, %5830, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5838 = torch.aten.div.Scalar %5837, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5839 = torch.aten.add.Scalar %5838, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5840 = torch.aten.rsqrt %5839 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%5841 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%5842 = torch.aten.broadcast_to %5840, %5841 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5843 = torch.aten.mul.Tensor %5835, %5842 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5844 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5845 = torch.aten.detach %5844 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5846 = torch.aten.view %5843, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5847 = torch.aten.abs %5846 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_152, %indices_153 = torch.aten.max.dim %5847, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5848 = torch.aten.view %values_152, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5849 = torch.aten.broadcast_to %5848, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5850 = torch.aten.clone %5849, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5851 = torch.aten.view %5850, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5852 = torch.aten.sub.Tensor %5845, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5853 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5854 = torch.aten.pow.Tensor_Tensor %5853, %5852 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5855 = torch.aten.neg %5854 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5856 = torch.aten.neg %5855 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5857 = torch.aten.div.Tensor %5851, %5856 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5858 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5859 = torch.aten.detach %5858 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5860 = torch.aten.div.Tensor %5843, %5857 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5861 = torch.aten.add.Tensor %5860, %5859, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5862 = torch.aten.sub.Tensor %5845, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5863 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5864 = torch.aten.pow.Tensor_Tensor %5863, %5862 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5865 = torch.aten.neg %5864 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5866 = torch.aten.sub.Tensor %5845, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5867 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5868 = torch.aten.pow.Tensor_Tensor %5867, %5866 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5869 = torch.aten.sub.Tensor %5868, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5870 = torch.aten.gt.Tensor %5861, %5869 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5871 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5872 = torch.aten.to.dtype %5871, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5873 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5874 = torch.aten.broadcast_to %5872, %5873 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5875 = torch.valsem.aten.copy %5874, %5869, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5876 = torch.aten.where.self %5870, %5875, %5861 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5877 = torch.aten.lt.Tensor %5876, %5865 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5878 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5879 = torch.aten.to.dtype %5878, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5880 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5881 = torch.aten.broadcast_to %5879, %5880 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5882 = torch.valsem.aten.copy %5881, %5865, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5883 = torch.aten.where.self %5877, %5882, %5876 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5884 = torch.aten.round %5883 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5885 = torch.aten.sub.Tensor %5884, %5859, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5886 = torch.aten.mul.Tensor %5885, %5857 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5887 = torch.aten.broadcast_to %272, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5888 = torch.aten.clone %5887, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5889 = torch.aten.view %5888, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5890 = torch.aten.mul.Tensor %273, %5889 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5891 = torch.aten.transpose.int %5890, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5892 = torch.aten.view %5886, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5893 = torch.aten.mm %5892, %5891 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5894 = torch.aten.mul.Scalar %274, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5895 = torch.aten.add.Tensor %5894, %5893, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5896 = torch.aten.view %5895, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5897 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5898 = torch.aten.detach %5897 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5899 = torch.aten.view %5843, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5900 = torch.aten.abs %5899 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_154, %indices_155 = torch.aten.max.dim %5900, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5901 = torch.aten.view %values_154, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5902 = torch.aten.broadcast_to %5901, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5903 = torch.aten.clone %5902, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5904 = torch.aten.view %5903, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5905 = torch.aten.sub.Tensor %5898, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5906 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5907 = torch.aten.pow.Tensor_Tensor %5906, %5905 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5908 = torch.aten.neg %5907 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5909 = torch.aten.neg %5908 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5910 = torch.aten.div.Tensor %5904, %5909 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5911 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5912 = torch.aten.detach %5911 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5913 = torch.aten.div.Tensor %5843, %5910 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5914 = torch.aten.add.Tensor %5913, %5912, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5915 = torch.aten.sub.Tensor %5898, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5916 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5917 = torch.aten.pow.Tensor_Tensor %5916, %5915 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5918 = torch.aten.neg %5917 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5919 = torch.aten.sub.Tensor %5898, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5920 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5921 = torch.aten.pow.Tensor_Tensor %5920, %5919 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5922 = torch.aten.sub.Tensor %5921, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5923 = torch.aten.gt.Tensor %5914, %5922 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5924 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5925 = torch.aten.to.dtype %5924, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5926 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5927 = torch.aten.broadcast_to %5925, %5926 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5928 = torch.valsem.aten.copy %5927, %5922, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5929 = torch.aten.where.self %5923, %5928, %5914 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5930 = torch.aten.lt.Tensor %5929, %5918 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5931 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5932 = torch.aten.to.dtype %5931, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5933 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5934 = torch.aten.broadcast_to %5932, %5933 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5935 = torch.valsem.aten.copy %5934, %5918, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5936 = torch.aten.where.self %5930, %5935, %5929 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5937 = torch.aten.round %5936 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5938 = torch.aten.sub.Tensor %5937, %5912, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5939 = torch.aten.mul.Tensor %5938, %5910 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5940 = torch.aten.broadcast_to %275, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5941 = torch.aten.clone %5940, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5942 = torch.aten.view %5941, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5943 = torch.aten.mul.Tensor %276, %5942 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5944 = torch.aten.transpose.int %5943, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5945 = torch.aten.view %5939, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5946 = torch.aten.mm %5945, %5944 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5947 = torch.aten.mul.Scalar %277, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%5948 = torch.aten.add.Tensor %5947, %5946, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5949 = torch.aten.view %5948, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5950 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5951 = torch.aten.detach %5950 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5952 = torch.aten.view %5843, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5953 = torch.aten.abs %5952 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_156, %indices_157 = torch.aten.max.dim %5953, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%5954 = torch.aten.view %values_156, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%5955 = torch.aten.broadcast_to %5954, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5956 = torch.aten.clone %5955, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%5957 = torch.aten.view %5956, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5958 = torch.aten.sub.Tensor %5951, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5959 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5960 = torch.aten.pow.Tensor_Tensor %5959, %5958 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5961 = torch.aten.neg %5960 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5962 = torch.aten.neg %5961 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5963 = torch.aten.div.Tensor %5957, %5962 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5964 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5965 = torch.aten.detach %5964 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5966 = torch.aten.div.Tensor %5843, %5963 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5967 = torch.aten.add.Tensor %5966, %5965, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5968 = torch.aten.sub.Tensor %5951, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5969 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5970 = torch.aten.pow.Tensor_Tensor %5969, %5968 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5971 = torch.aten.neg %5970 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5972 = torch.aten.sub.Tensor %5951, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5973 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%5974 = torch.aten.pow.Tensor_Tensor %5973, %5972 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%5975 = torch.aten.sub.Tensor %5974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%5976 = torch.aten.gt.Tensor %5967, %5975 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5977 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5978 = torch.aten.to.dtype %5977, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5979 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5980 = torch.aten.broadcast_to %5978, %5979 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5981 = torch.valsem.aten.copy %5980, %5975, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5982 = torch.aten.where.self %5976, %5981, %5967 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5983 = torch.aten.lt.Tensor %5982, %5971 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%5984 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%5985 = torch.aten.to.dtype %5984, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%5986 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%5987 = torch.aten.broadcast_to %5985, %5986 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%5988 = torch.valsem.aten.copy %5987, %5971, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%5989 = torch.aten.where.self %5983, %5988, %5982 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5990 = torch.aten.round %5989 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5991 = torch.aten.sub.Tensor %5990, %5965, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5992 = torch.aten.mul.Tensor %5991, %5963 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%5993 = torch.aten.broadcast_to %278, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5994 = torch.aten.clone %5993, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%5995 = torch.aten.view %5994, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5996 = torch.aten.mul.Tensor %279, %5995 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5997 = torch.aten.transpose.int %5996, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%5998 = torch.aten.view %5992, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%5999 = torch.aten.mm %5998, %5997 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6000 = torch.aten.mul.Scalar %280, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%6001 = torch.aten.add.Tensor %6000, %5999, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6002 = torch.aten.view %6001, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6003 = torch.aten.view %5896, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6004 = torch.aten.permute %6003, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6005 = torch.aten.clone %6004, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6006 = torch.aten.view %6005, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6007 = torch.aten.view %5949, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6008 = torch.aten.permute %6007, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6009 = torch.aten.clone %6008, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6010 = torch.aten.view %6009, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6011 = torch.aten.view %6002, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6012 = torch.aten.permute %6011, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6013 = torch.aten.clone %6012, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6014 = torch.aten.view %6013, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6015 = torch.aten.transpose.int %6010, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
%6016 = torch.aten.broadcast_to %6006, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6017 = torch.aten.view %6016, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6018 = torch.aten.broadcast_to %6015, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
%6019 = torch.aten.view %6018, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
%6020 = torch.aten.bmm %6017, %6019 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6021 = torch.aten.view %6020, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6022 = torch.aten.mul.Tensor %6021, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%values_158, %indices_159 = torch.aten.max.dim %6022, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
%6023 = torch.aten.sub.Tensor %6022, %values_158, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6024 = torch.aten.exp %6023 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6025 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%6026 = torch.aten.sum.dim_IntList %6024, %6025, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
%6027 = torch.aten.div.Tensor %6024, %6026 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6028 = torch.aten.broadcast_to %6027, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6029 = torch.aten.view %6028, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
%6030 = torch.aten.broadcast_to %6014, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6031 = torch.aten.view %6030, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6032 = torch.aten.bmm %6029, %6031 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6033 = torch.aten.view %6032, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6034 = torch.aten.view %6033, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6035 = torch.aten.permute %6034, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6036 = torch.aten.clone %6035, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6037 = torch.aten.view %6036, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6038 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6039 = torch.aten.detach %6038 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6040 = torch.aten.view %6037, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6041 = torch.aten.abs %6040 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_160, %indices_161 = torch.aten.max.dim %6041, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%6042 = torch.aten.view %values_160, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%6043 = torch.aten.broadcast_to %6042, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6044 = torch.aten.clone %6043, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6045 = torch.aten.view %6044, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6046 = torch.aten.sub.Tensor %6039, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6047 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6048 = torch.aten.pow.Tensor_Tensor %6047, %6046 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6049 = torch.aten.neg %6048 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6050 = torch.aten.neg %6049 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6051 = torch.aten.div.Tensor %6045, %6050 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6052 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6053 = torch.aten.detach %6052 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6054 = torch.aten.div.Tensor %6037, %6051 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6055 = torch.aten.add.Tensor %6054, %6053, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6056 = torch.aten.sub.Tensor %6039, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6057 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6058 = torch.aten.pow.Tensor_Tensor %6057, %6056 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6059 = torch.aten.neg %6058 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6060 = torch.aten.sub.Tensor %6039, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6061 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6062 = torch.aten.pow.Tensor_Tensor %6061, %6060 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6063 = torch.aten.sub.Tensor %6062, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6064 = torch.aten.gt.Tensor %6055, %6063 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%6065 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6066 = torch.aten.to.dtype %6065, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6067 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6068 = torch.aten.broadcast_to %6066, %6067 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6069 = torch.valsem.aten.copy %6068, %6063, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6070 = torch.aten.where.self %6064, %6069, %6055 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6071 = torch.aten.lt.Tensor %6070, %6059 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%6072 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6073 = torch.aten.to.dtype %6072, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6074 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6075 = torch.aten.broadcast_to %6073, %6074 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6076 = torch.valsem.aten.copy %6075, %6059, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6077 = torch.aten.where.self %6071, %6076, %6070 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6078 = torch.aten.round %6077 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6079 = torch.aten.sub.Tensor %6078, %6053, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6080 = torch.aten.mul.Tensor %6079, %6051 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6081 = torch.aten.broadcast_to %281, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%6082 = torch.aten.clone %6081, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%6083 = torch.aten.view %6082, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6084 = torch.aten.mul.Tensor %282, %6083 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6085 = torch.aten.transpose.int %6084, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6086 = torch.aten.view %6080, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6087 = torch.aten.mm %6086, %6085 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6088 = torch.aten.mul.Scalar %283, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%6089 = torch.aten.add.Tensor %6088, %6087, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6090 = torch.aten.view %6089, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6091 = torch.aten.add.Tensor %6090, %5829, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6092 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%6093 = torch.aten.sum.dim_IntList %6091, %6092, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6094 = torch.aten.div.Scalar %6093, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6095 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%6096 = torch.aten.broadcast_to %6094, %6095 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6097 = torch.aten.sub.Tensor %6091, %6096, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6098 = torch.aten.mul.Tensor %6097, %6097 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6099 = torch.aten.sum.dim_IntList %6098, %6092, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6100 = torch.aten.div.Scalar %6099, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6101 = torch.aten.add.Scalar %6100, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6102 = torch.aten.rsqrt %6101 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6103 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%6104 = torch.aten.broadcast_to %6102, %6103 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6105 = torch.aten.mul.Tensor %6097, %6104 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6106 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6107 = torch.aten.detach %6106 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6108 = torch.aten.view %6105, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6109 = torch.aten.abs %6108 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_162, %indices_163 = torch.aten.max.dim %6109, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%6110 = torch.aten.view %values_162, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%6111 = torch.aten.broadcast_to %6110, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6112 = torch.aten.clone %6111, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6113 = torch.aten.view %6112, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6114 = torch.aten.sub.Tensor %6107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6115 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6116 = torch.aten.pow.Tensor_Tensor %6115, %6114 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6117 = torch.aten.neg %6116 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6118 = torch.aten.neg %6117 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6119 = torch.aten.div.Tensor %6113, %6118 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6120 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6121 = torch.aten.detach %6120 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6122 = torch.aten.div.Tensor %6105, %6119 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6123 = torch.aten.add.Tensor %6122, %6121, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6124 = torch.aten.sub.Tensor %6107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6125 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6126 = torch.aten.pow.Tensor_Tensor %6125, %6124 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6127 = torch.aten.neg %6126 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6128 = torch.aten.sub.Tensor %6107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6129 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6130 = torch.aten.pow.Tensor_Tensor %6129, %6128 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6131 = torch.aten.sub.Tensor %6130, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6132 = torch.aten.gt.Tensor %6123, %6131 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%6133 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6134 = torch.aten.to.dtype %6133, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6135 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6136 = torch.aten.broadcast_to %6134, %6135 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6137 = torch.valsem.aten.copy %6136, %6131, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6138 = torch.aten.where.self %6132, %6137, %6123 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6139 = torch.aten.lt.Tensor %6138, %6127 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%6140 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6141 = torch.aten.to.dtype %6140, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6142 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6143 = torch.aten.broadcast_to %6141, %6142 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6144 = torch.valsem.aten.copy %6143, %6127, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6145 = torch.aten.where.self %6139, %6144, %6138 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6146 = torch.aten.round %6145 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6147 = torch.aten.sub.Tensor %6146, %6121, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6148 = torch.aten.mul.Tensor %6147, %6119 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6149 = torch.aten.broadcast_to %284, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%6150 = torch.aten.clone %6149, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%6151 = torch.aten.view %6150, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6152 = torch.aten.mul.Tensor %285, %6151 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6153 = torch.aten.transpose.int %6152, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6154 = torch.aten.view %6148, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6155 = torch.aten.mm %6154, %6153 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6156 = torch.aten.mul.Scalar %286, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%6157 = torch.aten.add.Tensor %6156, %6155, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6158 = torch.aten.view %6157, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6159 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6160 = torch.aten.detach %6159 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6161 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%6162 = torch.aten.abs %6161 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_164, %indices_165 = torch.aten.max.dim %6162, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%6163 = torch.aten.view %values_164, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%6164 = torch.aten.broadcast_to %6163, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%6165 = torch.aten.clone %6164, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%6166 = torch.aten.view %6165, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6167 = torch.aten.sub.Tensor %6160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6168 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6169 = torch.aten.pow.Tensor_Tensor %6168, %6167 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6170 = torch.aten.neg %6169 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6171 = torch.aten.neg %6170 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6172 = torch.aten.div.Tensor %6166, %6171 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6173 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6174 = torch.aten.detach %6173 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6175 = torch.aten.div.Tensor %arg2, %6172 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6176 = torch.aten.add.Tensor %6175, %6174, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6177 = torch.aten.sub.Tensor %6160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6178 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6179 = torch.aten.pow.Tensor_Tensor %6178, %6177 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6180 = torch.aten.neg %6179 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6181 = torch.aten.sub.Tensor %6160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6182 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6183 = torch.aten.pow.Tensor_Tensor %6182, %6181 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6184 = torch.aten.sub.Tensor %6183, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6185 = torch.aten.gt.Tensor %6176, %6184 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%6186 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6187 = torch.aten.to.dtype %6186, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6188 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6189 = torch.aten.broadcast_to %6187, %6188 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6190 = torch.valsem.aten.copy %6189, %6184, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6191 = torch.aten.where.self %6185, %6190, %6176 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6192 = torch.aten.lt.Tensor %6191, %6180 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%6193 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6194 = torch.aten.to.dtype %6193, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6195 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6196 = torch.aten.broadcast_to %6194, %6195 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6197 = torch.valsem.aten.copy %6196, %6180, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6198 = torch.aten.where.self %6192, %6197, %6191 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6199 = torch.aten.round %6198 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6200 = torch.aten.sub.Tensor %6199, %6174, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6201 = torch.aten.mul.Tensor %6200, %6172 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6202 = torch.aten.broadcast_to %287, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%6203 = torch.aten.clone %6202, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%6204 = torch.aten.view %6203, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%6205 = torch.aten.mul.Tensor %288, %6204 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%6206 = torch.aten.transpose.int %6205, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
%6207 = torch.aten.view %6201, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%6208 = torch.aten.mm %6207, %6206 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
%6209 = torch.aten.view %6208, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
%6210 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6211 = torch.aten.detach %6210 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6212 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%6213 = torch.aten.abs %6212 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%values_166, %indices_167 = torch.aten.max.dim %6213, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
%6214 = torch.aten.view %values_166, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
%6215 = torch.aten.broadcast_to %6214, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%6216 = torch.aten.clone %6215, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
%6217 = torch.aten.view %6216, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6218 = torch.aten.sub.Tensor %6211, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6219 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6220 = torch.aten.pow.Tensor_Tensor %6219, %6218 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6221 = torch.aten.neg %6220 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6222 = torch.aten.neg %6221 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6223 = torch.aten.div.Tensor %6217, %6222 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6224 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6225 = torch.aten.detach %6224 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6226 = torch.aten.div.Tensor %arg2, %6223 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6227 = torch.aten.add.Tensor %6226, %6225, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6228 = torch.aten.sub.Tensor %6211, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6229 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6230 = torch.aten.pow.Tensor_Tensor %6229, %6228 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6231 = torch.aten.neg %6230 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6232 = torch.aten.sub.Tensor %6211, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6233 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6234 = torch.aten.pow.Tensor_Tensor %6233, %6232 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6235 = torch.aten.sub.Tensor %6234, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6236 = torch.aten.gt.Tensor %6227, %6235 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%6237 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6238 = torch.aten.to.dtype %6237, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6239 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6240 = torch.aten.broadcast_to %6238, %6239 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6241 = torch.valsem.aten.copy %6240, %6235, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6242 = torch.aten.where.self %6236, %6241, %6227 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6243 = torch.aten.lt.Tensor %6242, %6231 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
%6244 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6245 = torch.aten.to.dtype %6244, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6246 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6247 = torch.aten.broadcast_to %6245, %6246 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6248 = torch.valsem.aten.copy %6247, %6231, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6249 = torch.aten.where.self %6243, %6248, %6242 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6250 = torch.aten.round %6249 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6251 = torch.aten.sub.Tensor %6250, %6225, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6252 = torch.aten.mul.Tensor %6251, %6223 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
%6253 = torch.aten.broadcast_to %289, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%6254 = torch.aten.clone %6253, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
%6255 = torch.aten.view %6254, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%6256 = torch.aten.mul.Tensor %290, %6255 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
%6257 = torch.aten.transpose.int %6256, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
%6258 = torch.aten.view %6252, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
%6259 = torch.aten.mm %6258, %6257 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
%6260 = torch.aten.view %6259, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
%6261 = torch.aten.view %6158, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6262 = torch.aten.permute %6261, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6263 = torch.aten.clone %6262, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6264 = torch.aten.view %6263, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6265 = torch.aten.view %6209, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
%6266 = torch.aten.permute %6265, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%6267 = torch.aten.clone %6266, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%6268 = torch.aten.view %6267, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%6269 = torch.aten.view %6260, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
%6270 = torch.aten.permute %6269, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%6271 = torch.aten.clone %6270, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
%6272 = torch.aten.view %6271, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%6273 = torch.aten.transpose.int %6268, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
%6274 = torch.aten.broadcast_to %6264, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6275 = torch.aten.view %6274, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6276 = torch.aten.broadcast_to %6273, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
%6277 = torch.aten.view %6276, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
%6278 = torch.aten.bmm %6275, %6277 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6279 = torch.aten.view %6278, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6280 = torch.aten.mul.Tensor %6279, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%values_168, %indices_169 = torch.aten.max.dim %6280, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
%6281 = torch.aten.sub.Tensor %6280, %values_168, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6282 = torch.aten.exp %6281 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6283 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
%6284 = torch.aten.sum.dim_IntList %6282, %6283, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
%6285 = torch.aten.div.Tensor %6282, %6284 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6286 = torch.aten.broadcast_to %6285, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6287 = torch.aten.view %6286, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
%6288 = torch.aten.broadcast_to %6272, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%6289 = torch.aten.view %6288, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
%6290 = torch.aten.bmm %6287, %6289 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6291 = torch.aten.view %6290, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
%6292 = torch.aten.view %6291, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
%6293 = torch.aten.permute %6292, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6294 = torch.aten.clone %6293, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
%6295 = torch.aten.view %6294, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6296 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6297 = torch.aten.detach %6296 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6298 = torch.aten.view %6295, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6299 = torch.aten.abs %6298 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%values_170, %indices_171 = torch.aten.max.dim %6299, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
%6300 = torch.aten.view %values_170, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
%6301 = torch.aten.broadcast_to %6300, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6302 = torch.aten.clone %6301, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
%6303 = torch.aten.view %6302, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6304 = torch.aten.sub.Tensor %6297, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6305 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6306 = torch.aten.pow.Tensor_Tensor %6305, %6304 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6307 = torch.aten.neg %6306 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6308 = torch.aten.neg %6307 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6309 = torch.aten.div.Tensor %6303, %6308 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6310 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6311 = torch.aten.detach %6310 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6312 = torch.aten.div.Tensor %6295, %6309 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6313 = torch.aten.add.Tensor %6312, %6311, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6314 = torch.aten.sub.Tensor %6297, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6315 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6316 = torch.aten.pow.Tensor_Tensor %6315, %6314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6317 = torch.aten.neg %6316 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6318 = torch.aten.sub.Tensor %6297, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6319 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6320 = torch.aten.pow.Tensor_Tensor %6319, %6318 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6321 = torch.aten.sub.Tensor %6320, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6322 = torch.aten.gt.Tensor %6313, %6321 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%6323 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6324 = torch.aten.to.dtype %6323, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6325 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6326 = torch.aten.broadcast_to %6324, %6325 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6327 = torch.valsem.aten.copy %6326, %6321, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6328 = torch.aten.where.self %6322, %6327, %6313 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6329 = torch.aten.lt.Tensor %6328, %6317 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
%6330 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
%6331 = torch.aten.to.dtype %6330, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
%6332 = torch.prim.ListConstruct : () -> !torch.list<int> loc(#loc1)
%6333 = torch.aten.broadcast_to %6331, %6332 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
%6334 = torch.valsem.aten.copy %6333, %6317, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
%6335 = torch.aten.where.self %6329, %6334, %6328 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6336 = torch.aten.round %6335 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6337 = torch.aten.sub.Tensor %6336, %6311, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6338 = torch.aten.mul.Tensor %6337, %6309 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6339 = torch.aten.broadcast_to %291, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%6340 = torch.aten.clone %6339, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
%6341 = torch.aten.view %6340, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6342 = torch.aten.mul.Tensor %292, %6341 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6343 = torch.aten.transpose.int %6342, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
%6344 = torch.aten.view %6338, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6345 = torch.aten.mm %6344, %6343 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6346 = torch.aten.mul.Scalar %293, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%6347 = torch.aten.add.Tensor %6346, %6345, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6348 = torch.aten.view %6347, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6349 = torch.aten.add.Tensor %6348, %6091, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6350 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
%6351 = torch.aten.sum.dim_IntList %6349, %6350, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6352 = torch.aten.div.Scalar %6351, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6353 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%6354 = torch.aten.broadcast_to %6352, %6353 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6355 = torch.aten.sub.Tensor %6349, %6354, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6356 = torch.aten.mul.Tensor %6355, %6355 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6357 = torch.aten.sum.dim_IntList %6356, %6350, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6358 = torch.aten.div.Scalar %6357, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6359 = torch.aten.add.Scalar %6358, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6360 = torch.aten.rsqrt %6359 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
%6361 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
%6362 = torch.aten.broadcast_to %6360, %6361 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6363 = torch.aten.mul.Tensor %6355, %6362 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6364 = torch.aten.broadcast_to %294, %5492 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
%6365 = torch.aten.clone %6364, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
%6366 = torch.aten.view %6365, %5495 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
%6367 = torch.aten.mul.Tensor %295, %6366 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
%6368 = torch.aten.transpose.int %6367, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> loc(#loc1)
%6369 = torch.aten.view %6363, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6370 = torch.aten.mm %6369, %6368 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16> loc(#loc1)
%6371 = torch.aten.mul.Scalar %296, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> loc(#loc1)
%6372 = torch.aten.add.Tensor %6371, %6370, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16> loc(#loc1)
%6373 = torch.aten.view %6372, %5503 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16> loc(#loc1)
%6374 = torch.aten.slice.Tensor %6373, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%6375 = torch.aten.slice.Tensor %6373, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%6376 = torch.aten.gelu %6375, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%6377 = torch.aten.mul.Tensor %6374, %6376 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
%6378 = torch.aten.broadcast_to %297, %5509 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
%6379 = torch.aten.clone %6378, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
%6380 = torch.aten.view %6379, %5512 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
%6381 = torch.aten.mul.Tensor %298, %6380 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
%6382 = torch.aten.transpose.int %6381, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> loc(#loc1)
%6383 = torch.aten.view %6377, %5516 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16> loc(#loc1)
%6384 = torch.aten.mm %6383, %6382 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6385 = torch.aten.mul.Scalar %299, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
%6386 = torch.aten.add.Tensor %6385, %6384, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
%6387 = torch.aten.view %6386, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6388 = torch.aten.add.Tensor %6387, %6349, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
%6389 = torch.aten.view %6388, %5523 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
%6390 = torch.aten.permute %6389, %1789 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%6391 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6392 = torch.aten.detach %6391 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6393 = torch.aten.view %6390, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%6394 = torch.aten.abs %6393 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%values_172, %indices_173 = torch.aten.max.dim %6394, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
%6395 = torch.aten.view %values_172, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
%6396 = torch.aten.broadcast_to %6395, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%6397 = torch.aten.clone %6396, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
%6398 = torch.aten.view %6397, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%6399 = torch.aten.sub.Tensor %6392, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6400 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6401 = torch.aten.pow.Tensor_Tensor %6400, %6399 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6402 = torch.aten.neg %6401 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6403 = torch.aten.neg %6402 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6404 = torch.aten.div.Tensor %6398, %6403 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%6405 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6406 = torch.aten.detach %6405 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6407 = torch.aten.div.Tensor %6390, %6404 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%6408 = torch.aten.add.Tensor %6407, %6406, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
%6409 = torch.aten.sub.Tensor %6392, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6410 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
%6411 = torch.aten.pow.Tensor_Tensor %6410, %6409 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6412 = torch.aten.neg %6411 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
%6413 = torch.aten.sub.Tensor %6392, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
%6414 = torch.aten.clone %882, %none : !torch.vtensor<
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment