pashu123 · October 19, 2022 06:20
diff --git a/_lambda.mlir b/_lambda.mlir
 #loc0 = loc(unknown)
 module attributes {torch.debug_module_name = "_lambda"} {
  func.func @forward(%arg0: !torch.vtensor<[2,4,64,64],f16> loc(unknown), %arg1: !torch.vtensor<[],si64> loc(unknown), %arg2: !torch.vtensor<[2,77,768],f16> loc(unknown)) -> !torch.vtensor<[2,4,64,64],f16> {
    %int64 = torch.constant.int 64 loc(#loc1)
    %int320 = torch.constant.int 320 loc(#loc1)
    %int2 = torch.constant.int 2 loc(#loc1)
    %int40960 = torch.constant.int 40960 loc(#loc1)
    %int4096 = torch.constant.int 4096 loc(#loc1)
    %int10 = torch.constant.int 10 loc(#loc1)
    %int32 = torch.constant.int 32 loc(#loc1)
    %int640 = torch.constant.int 640 loc(#loc1)
    %int81920 = torch.constant.int 81920 loc(#loc1)
    %int20 = torch.constant.int 20 loc(#loc1)
    %int960 = torch.constant.int 960 loc(#loc1)
    %int122880 = torch.constant.int 122880 loc(#loc1)
    %int30 = torch.constant.int 30 loc(#loc1)
    %int1024 = torch.constant.int 1024 loc(#loc1)
    %int20480 = torch.constant.int 20480 loc(#loc1)
    %int30720 = torch.constant.int 30720 loc(#loc1)
    %int1280 = torch.constant.int 1280 loc(#loc1)
    %int40 = torch.constant.int 40 loc(#loc1)
    %int1920 = torch.constant.int 1920 loc(#loc1)
    %int61440 = torch.constant.int 61440 loc(#loc1)
    %int60 = torch.constant.int 60 loc(#loc1)
    %int256 = torch.constant.int 256 loc(#loc1)
    %int16 = torch.constant.int 16 loc(#loc1)
    %int10240 = torch.constant.int 10240 loc(#loc1)
    %int15360 = torch.constant.int 15360 loc(#loc1)
    %int2560 = torch.constant.int 2560 loc(#loc1)
    %int80 = torch.constant.int 80 loc(#loc1)
    %int8 = torch.constant.int 8 loc(#loc1)
    %int5120 = torch.constant.int 5120 loc(#loc1)
    %int1 = torch.constant.int 1 loc(#loc1)
    %float1.000000e00 = torch.constant.float 1.000000e+00 loc(#loc1)
    %int7 = torch.constant.int 7 loc(#loc1)
    %float0.000000e00 = torch.constant.float 0.000000e+00 loc(#loc1)
    %int160 = torch.constant.int 160 loc(#loc1)
    %0 = torch.vtensor.literal(dense<0.079056941504209485> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %1 = torch.vtensor.literal(dense<0.11180339887498948> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %2 = torch.vtensor.literal(dense<0.15811388300841897> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %3 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %4 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %5 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %6 = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64> loc(#loc1)
    %7 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64> loc(#loc1)
    %8 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc1)
    %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16> loc(#loc0)
    %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16> loc(#loc0)
    %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
    %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
    %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
    %54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
    %56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
    %57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
    %67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
    %99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
    %100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
    %102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
    %103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x20x1x3x3xf16>) : !torch.vtensor<[640,20,1,3,3],f16> loc(#loc0)
    %113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xsi8>) : !torch.vtensor<[640,320,3,3],si8> loc(#loc0)
    %114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
    %116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x20x1x1x1xf16>) : !torch.vtensor<[640,20,1,1,1],f16> loc(#loc0)
    %123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xsi8>) : !torch.vtensor<[640,320,1,1],si8> loc(#loc0)
    %124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
    %151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
    %152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
    %153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
    %154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
    %155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
    %165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
    %197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
    %198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
    %199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
    %200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
    %201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x40x1x3x3xf16>) : !torch.vtensor<[1280,40,1,3,3],f16> loc(#loc0)
    %211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xsi8>) : !torch.vtensor<[1280,640,3,3],si8> loc(#loc0)
    %212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x40x1x1x1xf16>) : !torch.vtensor<[1280,40,1,1,1],f16> loc(#loc0)
    %221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xsi8>) : !torch.vtensor<[1280,640,1,1],si8> loc(#loc0)
    %222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
    %249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
    %250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
    %251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
    %252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
    %253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
    %295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
    %296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
    %297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
    %298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
    %299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
    %368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
    %369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
    %370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
    %371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
    %372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
    %391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
    %392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
    %401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
    %402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
    %406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
    %407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
    %416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
    %417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
    %421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
    %422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
    %431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
    %432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
    %439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
    %440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
    %449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
    %450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
    %477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
    %478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
    %479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
    %480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
    %481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16> loc(#loc0)
    %488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8> loc(#loc0)
    %489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16> loc(#loc0)
    %498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8> loc(#loc0)
    %499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
    %526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
    %527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
    %528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
    %529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
    %530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
    %535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
    %536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x120x1x3x3xf16>) : !torch.vtensor<[1280,120,1,3,3],f16> loc(#loc0)
    %537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xsi8>) : !torch.vtensor<[1280,1920,3,3],si8> loc(#loc0)
    %538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> loc(#loc0)
    %540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x120x1x1x1xf16>) : !torch.vtensor<[1280,120,1,1,1],f16> loc(#loc0)
    %547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xsi8>) : !torch.vtensor<[1280,1920,1,1],si8> loc(#loc0)
    %548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x48x1xf16>) : !torch.vtensor<[1280,48,1],f16> loc(#loc0)
    %570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xsi8>) : !torch.vtensor<[1280,768],si8> loc(#loc0)
    %571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16> loc(#loc0)
    %572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8> loc(#loc0)
    %573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16> loc(#loc0)
    %575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8> loc(#loc0)
    %576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> loc(#loc0)
    %577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16> loc(#loc0)
    %578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8> loc(#loc0)
    %579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x1x1xf16>) : !torch.vtensor<[1280,80,1,1,1],f16> loc(#loc0)
    %581 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xsi8>) : !torch.vtensor<[1280,1280,1,1],si8> loc(#loc0)
    %582 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %583 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16> loc(#loc0)
    %584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8> loc(#loc0)
    %585 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %586 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
    %587 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> loc(#loc0)
    %588 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x120x1x3x3xf16>) : !torch.vtensor<[640,120,1,3,3],f16> loc(#loc0)
    %589 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xsi8>) : !torch.vtensor<[640,1920,3,3],si8> loc(#loc0)
    %590 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %591 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
    %592 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %593 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %594 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %595 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %596 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %597 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %598 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x120x1x1x1xf16>) : !torch.vtensor<[640,120,1,1,1],f16> loc(#loc0)
    %599 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xsi8>) : !torch.vtensor<[640,1920,1,1],si8> loc(#loc0)
    %600 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %601 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %602 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %605 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %606 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %607 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %608 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %609 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %610 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %611 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %612 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %613 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %614 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %615 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %616 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %617 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %618 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %619 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %620 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %621 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %622 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %623 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %624 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %625 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %626 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
    %627 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
    %628 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
    %629 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
    %630 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
    %631 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %632 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %633 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %634 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %635 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %636 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> loc(#loc0)
    %637 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x80x1x3x3xf16>) : !torch.vtensor<[640,80,1,3,3],f16> loc(#loc0)
    %638 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xsi8>) : !torch.vtensor<[640,1280,3,3],si8> loc(#loc0)
    %639 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %640 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
    %641 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %642 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %643 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %644 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %645 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x80x1x1x1xf16>) : !torch.vtensor<[640,80,1,1,1],f16> loc(#loc0)
    %648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xsi8>) : !torch.vtensor<[640,1280,1,1],si8> loc(#loc0)
    %649 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %650 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %651 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %652 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %653 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %654 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %655 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %656 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %657 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %658 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %659 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %660 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %661 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %662 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %663 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %664 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %665 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %666 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %667 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %668 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %669 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %670 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %671 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %672 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %673 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %674 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %675 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
    %676 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
    %677 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
    %678 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
    %679 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
    %680 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %681 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %682 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %683 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %684 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
    %685 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
    %686 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x60x1x3x3xf16>) : !torch.vtensor<[640,60,1,3,3],f16> loc(#loc0)
    %687 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xsi8>) : !torch.vtensor<[640,960,3,3],si8> loc(#loc0)
    %688 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %689 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> loc(#loc0)
    %690 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %691 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %692 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %693 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %694 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %695 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %696 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x60x1x1x1xf16>) : !torch.vtensor<[640,60,1,1,1],f16> loc(#loc0)
    %697 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xsi8>) : !torch.vtensor<[640,960,1,1],si8> loc(#loc0)
    %698 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %699 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %700 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %701 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %702 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %703 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %704 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %705 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %706 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %707 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %708 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %709 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %710 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %711 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %712 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %713 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %714 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %715 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %716 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %717 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %718 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %719 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x48x1xf16>) : !torch.vtensor<[640,48,1],f16> loc(#loc0)
    %720 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xsi8>) : !torch.vtensor<[640,768],si8> loc(#loc0)
    %721 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16> loc(#loc0)
    %722 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8> loc(#loc0)
    %723 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %724 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16> loc(#loc0)
    %725 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8> loc(#loc0)
    %726 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> loc(#loc0)
    %727 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16> loc(#loc0)
    %728 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8> loc(#loc0)
    %729 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %730 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x1x1xf16>) : !torch.vtensor<[640,40,1,1,1],f16> loc(#loc0)
    %731 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xsi8>) : !torch.vtensor<[640,640,1,1],si8> loc(#loc0)
    %732 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %733 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16> loc(#loc0)
    %734 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8> loc(#loc0)
    %735 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %736 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
    %737 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> loc(#loc0)
    %738 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x60x1x3x3xf16>) : !torch.vtensor<[320,60,1,3,3],f16> loc(#loc0)
    %739 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xsi8>) : !torch.vtensor<[320,960,3,3],si8> loc(#loc0)
    %740 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %741 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
    %742 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %743 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %744 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %745 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %746 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %747 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %748 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x60x1x1x1xf16>) : !torch.vtensor<[320,60,1,1,1],f16> loc(#loc0)
    %749 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xsi8>) : !torch.vtensor<[320,960,1,1],si8> loc(#loc0)
    %750 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %751 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %752 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %753 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %754 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %755 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %756 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %757 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %758 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %759 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %760 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %761 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %762 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %763 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %764 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %765 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %766 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %767 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %768 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %769 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %770 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %771 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %772 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %773 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %774 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %775 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %776 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
    %777 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
    %778 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %779 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
    %780 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
    %781 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %782 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %783 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %784 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %785 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %786 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %787 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x3x3xf16>) : !torch.vtensor<[320,40,1,3,3],f16> loc(#loc0)
    %788 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xsi8>) : !torch.vtensor<[320,640,3,3],si8> loc(#loc0)
    %789 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %790 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
    %791 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %792 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %793 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %794 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %795 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %796 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %797 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x1x1xf16>) : !torch.vtensor<[320,40,1,1,1],f16> loc(#loc0)
    %798 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xsi8>) : !torch.vtensor<[320,640,1,1],si8> loc(#loc0)
    %799 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %800 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %801 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %802 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %803 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %804 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %805 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %806 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %807 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %808 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %809 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %810 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %811 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %812 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %813 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %814 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %815 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %816 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %817 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %818 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %819 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %820 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %821 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %822 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %823 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %824 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %825 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
    %826 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
    %827 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %828 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
    %829 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
    %830 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %831 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %832 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %833 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %834 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %835 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> loc(#loc0)
    %836 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x3x3xf16>) : !torch.vtensor<[320,40,1,3,3],f16> loc(#loc0)
    %837 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xsi8>) : !torch.vtensor<[320,640,3,3],si8> loc(#loc0)
    %838 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %839 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> loc(#loc0)
    %840 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %841 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %842 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %843 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16> loc(#loc0)
    %844 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8> loc(#loc0)
    %845 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %846 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x1x1xf16>) : !torch.vtensor<[320,40,1,1,1],f16> loc(#loc0)
    %847 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xsi8>) : !torch.vtensor<[320,640,1,1],si8> loc(#loc0)
    %848 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %849 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %850 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %851 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %852 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %853 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %854 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %855 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %856 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %857 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %858 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %859 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %860 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %861 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %862 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %863 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %864 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %865 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %866 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %867 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %868 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %869 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x48x1xf16>) : !torch.vtensor<[320,48,1],f16> loc(#loc0)
    %870 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xsi8>) : !torch.vtensor<[320,768],si8> loc(#loc0)
    %871 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16> loc(#loc0)
    %872 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8> loc(#loc0)
    %873 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %874 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16> loc(#loc0)
    %875 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8> loc(#loc0)
    %876 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> loc(#loc0)
    %877 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16> loc(#loc0)
    %878 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8> loc(#loc0)
    %879 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %880 = torch.vtensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32> loc(#loc0)
    %881 = torch.vtensor.literal(dense<0.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32> loc(#loc0)
    %882 = torch.vtensor.literal(dense<2.000000e+00> : tensor<f32>) : !torch.vtensor<[],f32> loc(#loc0)
    %883 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x1x1xf16>) : !torch.vtensor<[320,20,1,1,1],f16> loc(#loc0)
    %884 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xsi8>) : !torch.vtensor<[320,320,1,1],si8> loc(#loc0)
    %885 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %886 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %887 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> loc(#loc0)
    %888 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16> loc(#loc0)
    %889 = torch.vtensor.literal(dense<[-1.393320e-03, -1.588820e-03, -2.624990e-04, -2.531050e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16> loc(#loc0)
    %int0 = torch.constant.int 0 loc(#loc1)
    %false = torch.constant.bool false loc(#loc1)
    %int6 = torch.constant.int 6 loc(#loc1)
    %none = torch.constant.none loc(#loc0)
    %int9223372036854775807 = torch.constant.int 9223372036854775807 loc(#loc1)
    %int-1 = torch.constant.int -1 loc(#loc1)
    %int5 = torch.constant.int 5 loc(#loc1)
    %true = torch.constant.bool true loc(#loc1)
    %int3 = torch.constant.int 3 loc(#loc1)
    %float1.000000e-05 = torch.constant.float 1.000000e-05 loc(#loc1)
    %int8192 = torch.constant.int 8192 loc(#loc1)
    %int-2 = torch.constant.int -2 loc(#loc1)
    %int77 = torch.constant.int 77 loc(#loc1)
    %int48 = torch.constant.int 48 loc(#loc1)
    %int768 = torch.constant.int 768 loc(#loc1)
    %int154 = torch.constant.int 154 loc(#loc1)
    %str = torch.constant.str "none" loc(#loc1)
    %int2048 = torch.constant.int 2048 loc(#loc1)
    %int512 = torch.constant.int 512 loc(#loc1)
    %int128 = torch.constant.int 128 loc(#loc1)
    %float2.000000e00 = torch.constant.float 2.000000e+00 loc(#loc1)
    %int120 = torch.constant.int 120 loc(#loc1)
    %890 = torch.aten.unsqueeze %arg1, %int0 : !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[1],si64> loc(#loc1)
    %891 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc2)
    %892 = torch.aten.broadcast_to %890, %891 : !torch.vtensor<[1],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> loc(#loc1)
    %cuda3A0 = torch.constant.device "cuda:0" loc(#loc1)
    %893 = torch.aten.arange.start_step %int0, %int160, %int1, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32> loc(#loc1)
    %894 = torch.aten.mul.Tensor %893, %8 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32> loc(#loc1)
    %895 = torch.aten.div.Tensor %894, %7 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32> loc(#loc1)
    %896 = torch.aten.exp %895 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32> loc(#loc1)
    %897 = torch.aten.slice.Tensor %892, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],si64> loc(#loc1)
    %898 = torch.aten.unsqueeze %897, %int1 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[2,1],si64> loc(#loc1)
    %899 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %900 = torch.aten.to.dtype %899, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %901 = torch.prim.ListConstruct %int2, %int1 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %902 = torch.aten.broadcast_to %900, %901 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,1],f32> loc(#loc1)
    %903 = torch.valsem.aten.copy %902, %898, %false : !torch.vtensor<[2,1],f32>, !torch.vtensor<[2,1],si64>, !torch.bool -> !torch.vtensor<[2,1],f32> loc(#loc1)
    %904 = torch.aten.unsqueeze %896, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32> loc(#loc1)
    %905 = torch.aten.slice.Tensor %904, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,160],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,160],f32> loc(#loc1)
    %906 = torch.aten.mul.Tensor %903, %905 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32> loc(#loc1)
    %907 = torch.aten.mul.Tensor %906, %6 : !torch.vtensor<[2,160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,160],f32> loc(#loc1)
    %908 = torch.aten.sin %907 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32> loc(#loc1)
    %909 = torch.aten.cos %907 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32> loc(#loc1)
    %910 = torch.prim.ListConstruct %908, %909 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor> loc(#loc1)
    %911 = torch.aten.cat %910, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
    %912 = torch.aten.slice.Tensor %911, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
    %913 = torch.aten.slice.Tensor %912, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32> loc(#loc1)
    %914 = torch.aten.slice.Tensor %911, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
    %915 = torch.aten.slice.Tensor %914, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32> loc(#loc1)
    %916 = torch.prim.ListConstruct %913, %915 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor> loc(#loc1)
    %917 = torch.aten.cat %916, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32> loc(#loc1)
    %918 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %919 = torch.aten.to.dtype %918, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %920 = torch.prim.ListConstruct %int2, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %921 = torch.aten.broadcast_to %919, %920 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %922 = torch.valsem.aten.copy %921, %917, %false : !torch.vtensor<[2,320],f16>, !torch.vtensor<[2,320],f32>, !torch.bool -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %923 = torch.aten.transpose.int %9, %int0, %int1 : !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,1280],f16> loc(#loc1)
    %924 = torch.aten.mm %922, %923 : !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %925 = torch.aten.mul.Scalar %10, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %926 = torch.aten.add.Tensor %925, %924, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %927 = torch.aten.sigmoid %926 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %928 = torch.aten.mul.Tensor %927, %926 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %929 = torch.aten.transpose.int %11, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %930 = torch.aten.mm %928, %929 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %931 = torch.aten.mul.Scalar %12, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %932 = torch.aten.add.Tensor %931, %930, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %933 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %934 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %935 = torch.aten.convolution %arg0, %13, %14, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %936 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %937 = torch.aten.view %935, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
    %938 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %939 = torch.aten.to.dtype %938, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %940 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %941 = torch.aten.broadcast_to %939, %940 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %942 = torch.valsem.aten.copy %941, %937, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %943 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %944 = torch.aten.to.dtype %942, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %945 = torch.aten.sum.dim_IntList %944, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %946 = torch.aten.div.Scalar %945, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %947 = torch.aten.sub.Tensor %944, %946, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %948 = torch.aten.mul.Tensor %947, %947 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %949 = torch.aten.sum.dim_IntList %948, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %950 = torch.aten.div.Scalar %949, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %951 = torch.aten.to.dtype %950, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %952 = torch.aten.sum.dim_IntList %942, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %953 = torch.aten.div.Scalar %952, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %954 = torch.aten.add.Tensor %951, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %955 = torch.aten.rsqrt %954 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %956 = torch.aten.sub.Tensor %937, %953, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %957 = torch.aten.mul.Tensor %956, %955 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %958 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %959 = torch.aten.view %957, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %960 = torch.aten.unsqueeze %15, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %961 = torch.aten.unsqueeze %960, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %962 = torch.aten.mul.Tensor %959, %961 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %963 = torch.aten.unsqueeze %16, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %964 = torch.aten.unsqueeze %963, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %965 = torch.aten.add.Tensor %962, %964, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %966 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %967 = torch.aten.to.dtype %966, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %968 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %969 = torch.aten.broadcast_to %967, %968 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %970 = torch.valsem.aten.copy %969, %965, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %971 = torch.aten.sigmoid %970 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %972 = torch.aten.mul.Tensor %971, %970 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %973 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %974 = torch.aten.detach %973 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %975 = torch.prim.ListConstruct %int2, %int20, %int16, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %976 = torch.aten.view %972, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %977 = torch.aten.abs %976 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values, %indices = torch.aten.max.dim %977, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %978 = torch.prim.ListConstruct %int2, %int20, %int1, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %979 = torch.aten.view %values, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %980 = torch.aten.broadcast_to %979, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %981 = torch.aten.clone %980, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %982 = torch.aten.view %981, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %983 = torch.aten.sub.Tensor %974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %984 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %985 = torch.aten.pow.Tensor_Tensor %984, %983 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %986 = torch.aten.neg %985 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %987 = torch.aten.neg %986 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %988 = torch.aten.div.Tensor %982, %987 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %989 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %990 = torch.aten.detach %989 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %991 = torch.aten.div.Tensor %972, %988 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %992 = torch.aten.add.Tensor %991, %990, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %993 = torch.aten.sub.Tensor %974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %994 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %995 = torch.aten.pow.Tensor_Tensor %994, %993 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %996 = torch.aten.neg %995 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %997 = torch.aten.sub.Tensor %974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %998 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %999 = torch.aten.pow.Tensor_Tensor %998, %997 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1000 = torch.aten.sub.Tensor %999, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1001 = torch.aten.gt.Tensor %992, %1000 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1002 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1003 = torch.aten.to.dtype %1002, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1004 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1005 = torch.aten.broadcast_to %1003, %1004 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1006 = torch.valsem.aten.copy %1005, %1000, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1007 = torch.aten.where.self %1001, %1006, %992 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1008 = torch.aten.lt.Tensor %1007, %996 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1009 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1010 = torch.aten.to.dtype %1009, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1011 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1012 = torch.aten.broadcast_to %1010, %1011 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1013 = torch.valsem.aten.copy %1012, %996, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1014 = torch.aten.where.self %1008, %1013, %1007 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1015 = torch.aten.round %1014 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1016 = torch.aten.sub.Tensor %1015, %990, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1017 = torch.aten.mul.Tensor %1016, %988 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1018 = torch.prim.ListConstruct %int320, %int20, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1019 = torch.aten.broadcast_to %17, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %1020 = torch.aten.clone %1019, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %1021 = torch.prim.ListConstruct %int320, %int320, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1022 = torch.aten.view %1020, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %1023 = torch.aten.mul.Tensor %18, %1022 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %1024 = torch.aten.convolution %1017, %1023, %19, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1025 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %1026 = torch.aten.mul.Tensor %1025, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %1027 = torch.aten.transpose.int %20, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
    %1028 = torch.aten.mm %1026, %1027 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1029 = torch.aten.mul.Scalar %21, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1030 = torch.aten.add.Tensor %1029, %1028, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1031 = torch.aten.slice.Tensor %1030, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1032 = torch.aten.slice.Tensor %1031, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1033 = torch.aten.unsqueeze %1032, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16> loc(#loc1)
    %1034 = torch.aten.unsqueeze %1033, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16> loc(#loc1)
    %1035 = torch.aten.add.Tensor %1024, %1034, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1036 = torch.aten.view %1035, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
    %1037 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1038 = torch.aten.to.dtype %1037, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1039 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1040 = torch.aten.broadcast_to %1038, %1039 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1041 = torch.valsem.aten.copy %1040, %1036, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1042 = torch.aten.to.dtype %1041, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1043 = torch.aten.sum.dim_IntList %1042, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1044 = torch.aten.div.Scalar %1043, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1045 = torch.aten.sub.Tensor %1042, %1044, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1046 = torch.aten.mul.Tensor %1045, %1045 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1047 = torch.aten.sum.dim_IntList %1046, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1048 = torch.aten.div.Scalar %1047, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1049 = torch.aten.to.dtype %1048, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1050 = torch.aten.sum.dim_IntList %1041, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1051 = torch.aten.div.Scalar %1050, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1052 = torch.aten.add.Tensor %1049, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1053 = torch.aten.rsqrt %1052 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1054 = torch.aten.sub.Tensor %1036, %1051, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1055 = torch.aten.mul.Tensor %1054, %1053 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1056 = torch.aten.view %1055, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1057 = torch.aten.unsqueeze %22, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %1058 = torch.aten.unsqueeze %1057, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %1059 = torch.aten.mul.Tensor %1056, %1058 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1060 = torch.aten.unsqueeze %23, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %1061 = torch.aten.unsqueeze %1060, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %1062 = torch.aten.add.Tensor %1059, %1061, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1063 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1064 = torch.aten.to.dtype %1063, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1065 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1066 = torch.aten.broadcast_to %1064, %1065 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1067 = torch.valsem.aten.copy %1066, %1062, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1068 = torch.aten.sigmoid %1067 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1069 = torch.aten.mul.Tensor %1068, %1067 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1070 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1071 = torch.aten.detach %1070 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1072 = torch.aten.view %1069, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1073 = torch.aten.abs %1072 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_0, %indices_1 = torch.aten.max.dim %1073, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %1074 = torch.aten.view %values_0, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %1075 = torch.aten.broadcast_to %1074, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1076 = torch.aten.clone %1075, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1077 = torch.aten.view %1076, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1078 = torch.aten.sub.Tensor %1071, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1079 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1080 = torch.aten.pow.Tensor_Tensor %1079, %1078 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1081 = torch.aten.neg %1080 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1082 = torch.aten.neg %1081 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1083 = torch.aten.div.Tensor %1077, %1082 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1084 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1085 = torch.aten.detach %1084 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1086 = torch.aten.div.Tensor %1069, %1083 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1087 = torch.aten.add.Tensor %1086, %1085, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1088 = torch.aten.sub.Tensor %1071, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1089 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1090 = torch.aten.pow.Tensor_Tensor %1089, %1088 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1091 = torch.aten.neg %1090 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1092 = torch.aten.sub.Tensor %1071, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1093 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1094 = torch.aten.pow.Tensor_Tensor %1093, %1092 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1095 = torch.aten.sub.Tensor %1094, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1096 = torch.aten.gt.Tensor %1087, %1095 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1097 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1098 = torch.aten.to.dtype %1097, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1099 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1100 = torch.aten.broadcast_to %1098, %1099 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1101 = torch.valsem.aten.copy %1100, %1095, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1102 = torch.aten.where.self %1096, %1101, %1087 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1103 = torch.aten.lt.Tensor %1102, %1091 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1104 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1105 = torch.aten.to.dtype %1104, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1106 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1107 = torch.aten.broadcast_to %1105, %1106 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1108 = torch.valsem.aten.copy %1107, %1091, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1109 = torch.aten.where.self %1103, %1108, %1102 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1110 = torch.aten.round %1109 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1111 = torch.aten.sub.Tensor %1110, %1085, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1112 = torch.aten.mul.Tensor %1111, %1083 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1113 = torch.aten.broadcast_to %24, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %1114 = torch.aten.clone %1113, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %1115 = torch.aten.view %1114, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %1116 = torch.aten.mul.Tensor %25, %1115 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %1117 = torch.aten.convolution %1112, %1116, %26, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1118 = torch.aten.add.Tensor %935, %1117, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1119 = torch.aten.div.Tensor %1118, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1120 = torch.aten.view %1119, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
    %1121 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1122 = torch.aten.to.dtype %1121, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1123 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1124 = torch.aten.broadcast_to %1122, %1123 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1125 = torch.valsem.aten.copy %1124, %1120, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1126 = torch.aten.to.dtype %1125, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1127 = torch.aten.sum.dim_IntList %1126, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1128 = torch.aten.div.Scalar %1127, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1129 = torch.aten.sub.Tensor %1126, %1128, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1130 = torch.aten.mul.Tensor %1129, %1129 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1131 = torch.aten.sum.dim_IntList %1130, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1132 = torch.aten.div.Scalar %1131, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1133 = torch.aten.to.dtype %1132, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1134 = torch.aten.sum.dim_IntList %1125, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1135 = torch.aten.div.Scalar %1134, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1136 = torch.aten.add.Tensor %1133, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1137 = torch.aten.rsqrt %1136 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1138 = torch.aten.sub.Tensor %1120, %1135, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1139 = torch.aten.mul.Tensor %1138, %1137 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1140 = torch.aten.view %1139, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1141 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1142 = torch.aten.to.dtype %1141, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1143 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1144 = torch.aten.broadcast_to %1142, %1143 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1145 = torch.valsem.aten.copy %1144, %1140, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1146 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1147 = torch.aten.detach %1146 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1148 = torch.aten.view %1145, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1149 = torch.aten.abs %1148 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_2, %indices_3 = torch.aten.max.dim %1149, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %1150 = torch.aten.view %values_2, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %1151 = torch.aten.broadcast_to %1150, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1152 = torch.aten.clone %1151, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1153 = torch.aten.view %1152, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1154 = torch.aten.sub.Tensor %1147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1155 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1156 = torch.aten.pow.Tensor_Tensor %1155, %1154 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1157 = torch.aten.neg %1156 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1158 = torch.aten.neg %1157 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1159 = torch.aten.div.Tensor %1153, %1158 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1160 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1161 = torch.aten.detach %1160 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1162 = torch.aten.div.Tensor %1145, %1159 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1163 = torch.aten.add.Tensor %1162, %1161, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1164 = torch.aten.sub.Tensor %1147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1165 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1166 = torch.aten.pow.Tensor_Tensor %1165, %1164 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1167 = torch.aten.neg %1166 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1168 = torch.aten.sub.Tensor %1147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1169 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1170 = torch.aten.pow.Tensor_Tensor %1169, %1168 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1171 = torch.aten.sub.Tensor %1170, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1172 = torch.aten.gt.Tensor %1163, %1171 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1173 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1174 = torch.aten.to.dtype %1173, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1175 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1176 = torch.aten.broadcast_to %1174, %1175 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1177 = torch.valsem.aten.copy %1176, %1171, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1178 = torch.aten.where.self %1172, %1177, %1163 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1179 = torch.aten.lt.Tensor %1178, %1167 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1180 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1181 = torch.aten.to.dtype %1180, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1182 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1183 = torch.aten.broadcast_to %1181, %1182 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1184 = torch.valsem.aten.copy %1183, %1167, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1185 = torch.aten.where.self %1179, %1184, %1178 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1186 = torch.aten.round %1185 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1187 = torch.aten.sub.Tensor %1186, %1161, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1188 = torch.aten.mul.Tensor %1187, %1159 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1189 = torch.prim.ListConstruct %int320, %int20, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1190 = torch.aten.broadcast_to %27, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %1191 = torch.aten.clone %1190, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %1192 = torch.prim.ListConstruct %int320, %int320, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1193 = torch.aten.view %1191, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %1194 = torch.aten.mul.Tensor %28, %1193 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %1195 = torch.aten.convolution %1188, %1194, %29, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1196 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %1197 = torch.aten.permute %1195, %1196 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
    %1198 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %1199 = torch.aten.view %1197, %1198 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1200 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %1201 = torch.aten.sum.dim_IntList %1199, %1200, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1202 = torch.aten.div.Scalar %1201, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1203 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1204 = torch.aten.broadcast_to %1202, %1203 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1205 = torch.aten.sub.Tensor %1199, %1204, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1206 = torch.aten.mul.Tensor %1205, %1205 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1207 = torch.aten.sum.dim_IntList %1206, %1200, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1208 = torch.aten.div.Scalar %1207, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1209 = torch.aten.add.Scalar %1208, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1210 = torch.aten.rsqrt %1209 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1211 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1212 = torch.aten.broadcast_to %1210, %1211 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1213 = torch.aten.mul.Tensor %1205, %1212 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1214 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1215 = torch.aten.detach %1214 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1216 = torch.prim.ListConstruct %int2, %int4096, %int20, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1217 = torch.aten.view %1213, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1218 = torch.aten.abs %1217 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_4, %indices_5 = torch.aten.max.dim %1218, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %1219 = torch.prim.ListConstruct %int2, %int4096, %int20, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1220 = torch.aten.view %values_4, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %1221 = torch.aten.broadcast_to %1220, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1222 = torch.aten.clone %1221, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1223 = torch.aten.view %1222, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1224 = torch.aten.sub.Tensor %1215, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1225 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1226 = torch.aten.pow.Tensor_Tensor %1225, %1224 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1227 = torch.aten.neg %1226 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1228 = torch.aten.neg %1227 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1229 = torch.aten.div.Tensor %1223, %1228 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1230 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1231 = torch.aten.detach %1230 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1232 = torch.aten.div.Tensor %1213, %1229 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1233 = torch.aten.add.Tensor %1232, %1231, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1234 = torch.aten.sub.Tensor %1215, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1235 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1236 = torch.aten.pow.Tensor_Tensor %1235, %1234 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1237 = torch.aten.neg %1236 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1238 = torch.aten.sub.Tensor %1215, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1239 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1240 = torch.aten.pow.Tensor_Tensor %1239, %1238 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1241 = torch.aten.sub.Tensor %1240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1242 = torch.aten.gt.Tensor %1233, %1241 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1243 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1244 = torch.aten.to.dtype %1243, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1245 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1246 = torch.aten.broadcast_to %1244, %1245 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1247 = torch.valsem.aten.copy %1246, %1241, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1248 = torch.aten.where.self %1242, %1247, %1233 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1249 = torch.aten.lt.Tensor %1248, %1237 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1250 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1251 = torch.aten.to.dtype %1250, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1252 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1253 = torch.aten.broadcast_to %1251, %1252 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1254 = torch.valsem.aten.copy %1253, %1237, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1255 = torch.aten.where.self %1249, %1254, %1248 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1256 = torch.aten.round %1255 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1257 = torch.aten.sub.Tensor %1256, %1231, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1258 = torch.aten.mul.Tensor %1257, %1229 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1259 = torch.prim.ListConstruct %int320, %int20, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1260 = torch.aten.broadcast_to %30, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1261 = torch.aten.clone %1260, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1262 = torch.prim.ListConstruct %int320, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1263 = torch.aten.view %1261, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1264 = torch.aten.mul.Tensor %31, %1263 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1265 = torch.aten.transpose.int %1264, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1266 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1267 = torch.aten.view %1258, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1268 = torch.aten.mm %1267, %1265 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1269 = torch.aten.mul.Scalar %32, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1270 = torch.aten.add.Tensor %1269, %1268, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1271 = torch.aten.view %1270, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1272 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1273 = torch.aten.detach %1272 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1274 = torch.aten.view %1213, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1275 = torch.aten.abs %1274 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_6, %indices_7 = torch.aten.max.dim %1275, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %1276 = torch.aten.view %values_6, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %1277 = torch.aten.broadcast_to %1276, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1278 = torch.aten.clone %1277, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1279 = torch.aten.view %1278, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1280 = torch.aten.sub.Tensor %1273, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1281 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1282 = torch.aten.pow.Tensor_Tensor %1281, %1280 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1283 = torch.aten.neg %1282 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1284 = torch.aten.neg %1283 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1285 = torch.aten.div.Tensor %1279, %1284 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1286 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1287 = torch.aten.detach %1286 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1288 = torch.aten.div.Tensor %1213, %1285 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1289 = torch.aten.add.Tensor %1288, %1287, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1290 = torch.aten.sub.Tensor %1273, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1291 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1292 = torch.aten.pow.Tensor_Tensor %1291, %1290 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1293 = torch.aten.neg %1292 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1294 = torch.aten.sub.Tensor %1273, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1295 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1296 = torch.aten.pow.Tensor_Tensor %1295, %1294 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1297 = torch.aten.sub.Tensor %1296, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1298 = torch.aten.gt.Tensor %1289, %1297 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1299 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1300 = torch.aten.to.dtype %1299, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1301 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1302 = torch.aten.broadcast_to %1300, %1301 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1303 = torch.valsem.aten.copy %1302, %1297, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1304 = torch.aten.where.self %1298, %1303, %1289 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1305 = torch.aten.lt.Tensor %1304, %1293 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1306 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1307 = torch.aten.to.dtype %1306, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1308 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1309 = torch.aten.broadcast_to %1307, %1308 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1310 = torch.valsem.aten.copy %1309, %1293, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1311 = torch.aten.where.self %1305, %1310, %1304 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1312 = torch.aten.round %1311 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1313 = torch.aten.sub.Tensor %1312, %1287, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1314 = torch.aten.mul.Tensor %1313, %1285 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1315 = torch.aten.broadcast_to %33, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1316 = torch.aten.clone %1315, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1317 = torch.aten.view %1316, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1318 = torch.aten.mul.Tensor %34, %1317 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1319 = torch.aten.transpose.int %1318, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1320 = torch.aten.view %1314, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1321 = torch.aten.mm %1320, %1319 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1322 = torch.aten.mul.Scalar %35, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1323 = torch.aten.add.Tensor %1322, %1321, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1324 = torch.aten.view %1323, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1325 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1326 = torch.aten.detach %1325 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1327 = torch.aten.view %1213, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1328 = torch.aten.abs %1327 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_8, %indices_9 = torch.aten.max.dim %1328, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %1329 = torch.aten.view %values_8, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %1330 = torch.aten.broadcast_to %1329, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1331 = torch.aten.clone %1330, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1332 = torch.aten.view %1331, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1333 = torch.aten.sub.Tensor %1326, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1334 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1335 = torch.aten.pow.Tensor_Tensor %1334, %1333 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1336 = torch.aten.neg %1335 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1337 = torch.aten.neg %1336 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1338 = torch.aten.div.Tensor %1332, %1337 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1339 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1340 = torch.aten.detach %1339 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1341 = torch.aten.div.Tensor %1213, %1338 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1342 = torch.aten.add.Tensor %1341, %1340, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1343 = torch.aten.sub.Tensor %1326, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1344 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1345 = torch.aten.pow.Tensor_Tensor %1344, %1343 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1346 = torch.aten.neg %1345 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1347 = torch.aten.sub.Tensor %1326, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1348 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1349 = torch.aten.pow.Tensor_Tensor %1348, %1347 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1350 = torch.aten.sub.Tensor %1349, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1351 = torch.aten.gt.Tensor %1342, %1350 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1352 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1353 = torch.aten.to.dtype %1352, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1354 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1355 = torch.aten.broadcast_to %1353, %1354 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1356 = torch.valsem.aten.copy %1355, %1350, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1357 = torch.aten.where.self %1351, %1356, %1342 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1358 = torch.aten.lt.Tensor %1357, %1346 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1359 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1360 = torch.aten.to.dtype %1359, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1361 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1362 = torch.aten.broadcast_to %1360, %1361 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1363 = torch.valsem.aten.copy %1362, %1346, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1364 = torch.aten.where.self %1358, %1363, %1357 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1365 = torch.aten.round %1364 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1366 = torch.aten.sub.Tensor %1365, %1340, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1367 = torch.aten.mul.Tensor %1366, %1338 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1368 = torch.aten.broadcast_to %36, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1369 = torch.aten.clone %1368, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1370 = torch.aten.view %1369, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1371 = torch.aten.mul.Tensor %37, %1370 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1372 = torch.aten.transpose.int %1371, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1373 = torch.aten.view %1367, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1374 = torch.aten.mm %1373, %1372 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1375 = torch.aten.mul.Scalar %38, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1376 = torch.aten.add.Tensor %1375, %1374, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1377 = torch.aten.view %1376, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1378 = torch.prim.ListConstruct %int2, %int4096, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %1379 = torch.aten.view %1271, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1380 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %1381 = torch.aten.permute %1379, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1382 = torch.aten.clone %1381, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1383 = torch.prim.ListConstruct %int16, %int4096, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1384 = torch.aten.view %1382, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1385 = torch.aten.view %1324, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1386 = torch.aten.permute %1385, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1387 = torch.aten.clone %1386, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1388 = torch.aten.view %1387, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1389 = torch.aten.view %1377, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1390 = torch.aten.permute %1389, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1391 = torch.aten.clone %1390, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1392 = torch.aten.view %1391, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1393 = torch.aten.transpose.int %1388, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
    %1394 = torch.aten.broadcast_to %1384, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1395 = torch.aten.view %1394, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1396 = torch.prim.ListConstruct %int16, %int40, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1397 = torch.aten.broadcast_to %1393, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
    %1398 = torch.aten.view %1397, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
    %1399 = torch.aten.bmm %1395, %1398 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1400 = torch.prim.ListConstruct %int16, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1401 = torch.aten.view %1399, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1402 = torch.aten.mul.Tensor %1401, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %values_10, %indices_11 = torch.aten.max.dim %1402, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
    %1403 = torch.aten.sub.Tensor %1402, %values_10, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1404 = torch.aten.exp %1403 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1405 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %1406 = torch.aten.sum.dim_IntList %1404, %1405, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
    %1407 = torch.aten.div.Tensor %1404, %1406 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1408 = torch.aten.broadcast_to %1407, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1409 = torch.aten.view %1408, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %1410 = torch.aten.broadcast_to %1392, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1411 = torch.aten.view %1410, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1412 = torch.aten.bmm %1409, %1411 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1413 = torch.aten.view %1412, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1414 = torch.prim.ListConstruct %int2, %int8, %int4096, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %1415 = torch.aten.view %1413, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1416 = torch.aten.permute %1415, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1417 = torch.aten.clone %1416, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1418 = torch.aten.view %1417, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1419 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1420 = torch.aten.detach %1419 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1421 = torch.aten.view %1418, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1422 = torch.aten.abs %1421 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_12, %indices_13 = torch.aten.max.dim %1422, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %1423 = torch.aten.view %values_12, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %1424 = torch.aten.broadcast_to %1423, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1425 = torch.aten.clone %1424, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1426 = torch.aten.view %1425, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1427 = torch.aten.sub.Tensor %1420, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1428 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1429 = torch.aten.pow.Tensor_Tensor %1428, %1427 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1430 = torch.aten.neg %1429 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1431 = torch.aten.neg %1430 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1432 = torch.aten.div.Tensor %1426, %1431 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1433 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1434 = torch.aten.detach %1433 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1435 = torch.aten.div.Tensor %1418, %1432 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1436 = torch.aten.add.Tensor %1435, %1434, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1437 = torch.aten.sub.Tensor %1420, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1438 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1439 = torch.aten.pow.Tensor_Tensor %1438, %1437 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1440 = torch.aten.neg %1439 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1441 = torch.aten.sub.Tensor %1420, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1442 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1443 = torch.aten.pow.Tensor_Tensor %1442, %1441 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1444 = torch.aten.sub.Tensor %1443, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1445 = torch.aten.gt.Tensor %1436, %1444 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1446 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1447 = torch.aten.to.dtype %1446, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1448 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1449 = torch.aten.broadcast_to %1447, %1448 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1450 = torch.valsem.aten.copy %1449, %1444, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1451 = torch.aten.where.self %1445, %1450, %1436 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1452 = torch.aten.lt.Tensor %1451, %1440 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1453 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1454 = torch.aten.to.dtype %1453, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1455 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1456 = torch.aten.broadcast_to %1454, %1455 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1457 = torch.valsem.aten.copy %1456, %1440, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1458 = torch.aten.where.self %1452, %1457, %1451 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1459 = torch.aten.round %1458 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1460 = torch.aten.sub.Tensor %1459, %1434, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1461 = torch.aten.mul.Tensor %1460, %1432 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1462 = torch.aten.broadcast_to %39, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1463 = torch.aten.clone %1462, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1464 = torch.aten.view %1463, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1465 = torch.aten.mul.Tensor %40, %1464 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1466 = torch.aten.transpose.int %1465, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1467 = torch.aten.view %1461, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1468 = torch.aten.mm %1467, %1466 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1469 = torch.aten.mul.Scalar %41, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1470 = torch.aten.add.Tensor %1469, %1468, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1471 = torch.aten.view %1470, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1472 = torch.aten.add.Tensor %1471, %1199, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1473 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %1474 = torch.aten.sum.dim_IntList %1472, %1473, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1475 = torch.aten.div.Scalar %1474, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1476 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1477 = torch.aten.broadcast_to %1475, %1476 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1478 = torch.aten.sub.Tensor %1472, %1477, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1479 = torch.aten.mul.Tensor %1478, %1478 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1480 = torch.aten.sum.dim_IntList %1479, %1473, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1481 = torch.aten.div.Scalar %1480, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1482 = torch.aten.add.Scalar %1481, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1483 = torch.aten.rsqrt %1482 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1484 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1485 = torch.aten.broadcast_to %1483, %1484 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1486 = torch.aten.mul.Tensor %1478, %1485 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1487 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1488 = torch.aten.detach %1487 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1489 = torch.aten.view %1486, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1490 = torch.aten.abs %1489 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_14, %indices_15 = torch.aten.max.dim %1490, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %1491 = torch.aten.view %values_14, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %1492 = torch.aten.broadcast_to %1491, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1493 = torch.aten.clone %1492, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1494 = torch.aten.view %1493, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1495 = torch.aten.sub.Tensor %1488, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1496 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1497 = torch.aten.pow.Tensor_Tensor %1496, %1495 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1498 = torch.aten.neg %1497 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1499 = torch.aten.neg %1498 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1500 = torch.aten.div.Tensor %1494, %1499 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1501 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1502 = torch.aten.detach %1501 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1503 = torch.aten.div.Tensor %1486, %1500 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1504 = torch.aten.add.Tensor %1503, %1502, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1505 = torch.aten.sub.Tensor %1488, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1506 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1507 = torch.aten.pow.Tensor_Tensor %1506, %1505 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1508 = torch.aten.neg %1507 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1509 = torch.aten.sub.Tensor %1488, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1510 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1511 = torch.aten.pow.Tensor_Tensor %1510, %1509 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1512 = torch.aten.sub.Tensor %1511, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1513 = torch.aten.gt.Tensor %1504, %1512 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1514 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1515 = torch.aten.to.dtype %1514, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1516 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1517 = torch.aten.broadcast_to %1515, %1516 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1518 = torch.valsem.aten.copy %1517, %1512, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1519 = torch.aten.where.self %1513, %1518, %1504 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1520 = torch.aten.lt.Tensor %1519, %1508 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1521 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1522 = torch.aten.to.dtype %1521, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1523 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1524 = torch.aten.broadcast_to %1522, %1523 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1525 = torch.valsem.aten.copy %1524, %1508, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1526 = torch.aten.where.self %1520, %1525, %1519 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1527 = torch.aten.round %1526 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1528 = torch.aten.sub.Tensor %1527, %1502, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1529 = torch.aten.mul.Tensor %1528, %1500 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1530 = torch.aten.broadcast_to %42, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1531 = torch.aten.clone %1530, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1532 = torch.aten.view %1531, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1533 = torch.aten.mul.Tensor %43, %1532 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1534 = torch.aten.transpose.int %1533, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1535 = torch.aten.view %1529, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1536 = torch.aten.mm %1535, %1534 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1537 = torch.aten.mul.Scalar %44, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1538 = torch.aten.add.Tensor %1537, %1536, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1539 = torch.aten.view %1538, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1540 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1541 = torch.aten.detach %1540 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1542 = torch.prim.ListConstruct %int2, %int77, %int48, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1543 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %1544 = torch.aten.abs %1543 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_16, %indices_17 = torch.aten.max.dim %1544, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %1545 = torch.prim.ListConstruct %int2, %int77, %int48, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1546 = torch.aten.view %values_16, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %1547 = torch.aten.broadcast_to %1546, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %1548 = torch.aten.clone %1547, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %1549 = torch.prim.ListConstruct %int2, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc6)
    %1550 = torch.aten.view %1548, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1551 = torch.aten.sub.Tensor %1541, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1552 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1553 = torch.aten.pow.Tensor_Tensor %1552, %1551 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1554 = torch.aten.neg %1553 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1555 = torch.aten.neg %1554 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1556 = torch.aten.div.Tensor %1550, %1555 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1557 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1558 = torch.aten.detach %1557 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1559 = torch.aten.div.Tensor %arg2, %1556 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1560 = torch.aten.add.Tensor %1559, %1558, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1561 = torch.aten.sub.Tensor %1541, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1562 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1563 = torch.aten.pow.Tensor_Tensor %1562, %1561 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1564 = torch.aten.neg %1563 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1565 = torch.aten.sub.Tensor %1541, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1566 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1567 = torch.aten.pow.Tensor_Tensor %1566, %1565 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1568 = torch.aten.sub.Tensor %1567, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1569 = torch.aten.gt.Tensor %1560, %1568 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %1570 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1571 = torch.aten.to.dtype %1570, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1572 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1573 = torch.aten.broadcast_to %1571, %1572 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1574 = torch.valsem.aten.copy %1573, %1568, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1575 = torch.aten.where.self %1569, %1574, %1560 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1576 = torch.aten.lt.Tensor %1575, %1564 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %1577 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1578 = torch.aten.to.dtype %1577, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1579 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1580 = torch.aten.broadcast_to %1578, %1579 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1581 = torch.valsem.aten.copy %1580, %1564, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1582 = torch.aten.where.self %1576, %1581, %1575 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1583 = torch.aten.round %1582 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1584 = torch.aten.sub.Tensor %1583, %1558, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1585 = torch.aten.mul.Tensor %1584, %1556 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1586 = torch.prim.ListConstruct %int320, %int48, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1587 = torch.aten.broadcast_to %45, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %1588 = torch.aten.clone %1587, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %1589 = torch.prim.ListConstruct %int320, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1590 = torch.aten.view %1588, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %1591 = torch.aten.mul.Tensor %46, %1590 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %1592 = torch.aten.transpose.int %1591, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
    %1593 = torch.prim.ListConstruct %int154, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %1594 = torch.aten.view %1585, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %1595 = torch.aten.mm %1594, %1592 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
    %1596 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %1597 = torch.aten.view %1595, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
    %1598 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1599 = torch.aten.detach %1598 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1600 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %1601 = torch.aten.abs %1600 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_18, %indices_19 = torch.aten.max.dim %1601, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %1602 = torch.aten.view %values_18, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %1603 = torch.aten.broadcast_to %1602, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %1604 = torch.aten.clone %1603, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %1605 = torch.aten.view %1604, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1606 = torch.aten.sub.Tensor %1599, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1607 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1608 = torch.aten.pow.Tensor_Tensor %1607, %1606 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1609 = torch.aten.neg %1608 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1610 = torch.aten.neg %1609 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1611 = torch.aten.div.Tensor %1605, %1610 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1612 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1613 = torch.aten.detach %1612 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1614 = torch.aten.div.Tensor %arg2, %1611 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1615 = torch.aten.add.Tensor %1614, %1613, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1616 = torch.aten.sub.Tensor %1599, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1617 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1618 = torch.aten.pow.Tensor_Tensor %1617, %1616 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1619 = torch.aten.neg %1618 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1620 = torch.aten.sub.Tensor %1599, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1621 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1622 = torch.aten.pow.Tensor_Tensor %1621, %1620 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1623 = torch.aten.sub.Tensor %1622, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1624 = torch.aten.gt.Tensor %1615, %1623 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %1625 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1626 = torch.aten.to.dtype %1625, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1627 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1628 = torch.aten.broadcast_to %1626, %1627 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1629 = torch.valsem.aten.copy %1628, %1623, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1630 = torch.aten.where.self %1624, %1629, %1615 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1631 = torch.aten.lt.Tensor %1630, %1619 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %1632 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1633 = torch.aten.to.dtype %1632, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1634 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1635 = torch.aten.broadcast_to %1633, %1634 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1636 = torch.valsem.aten.copy %1635, %1619, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1637 = torch.aten.where.self %1631, %1636, %1630 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1638 = torch.aten.round %1637 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1639 = torch.aten.sub.Tensor %1638, %1613, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1640 = torch.aten.mul.Tensor %1639, %1611 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %1641 = torch.aten.broadcast_to %47, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %1642 = torch.aten.clone %1641, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %1643 = torch.aten.view %1642, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %1644 = torch.aten.mul.Tensor %48, %1643 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %1645 = torch.aten.transpose.int %1644, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
    %1646 = torch.aten.view %1640, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %1647 = torch.aten.mm %1646, %1645 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
    %1648 = torch.aten.view %1647, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
    %1649 = torch.aten.view %1539, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1650 = torch.aten.permute %1649, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1651 = torch.aten.clone %1650, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1652 = torch.aten.view %1651, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1653 = torch.prim.ListConstruct %int2, %int77, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %1654 = torch.aten.view %1597, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
    %1655 = torch.aten.permute %1654, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %1656 = torch.aten.clone %1655, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %1657 = torch.prim.ListConstruct %int16, %int77, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1658 = torch.aten.view %1656, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %1659 = torch.aten.view %1648, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
    %1660 = torch.aten.permute %1659, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %1661 = torch.aten.clone %1660, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %1662 = torch.aten.view %1661, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %1663 = torch.aten.transpose.int %1658, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
    %1664 = torch.aten.broadcast_to %1652, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1665 = torch.aten.view %1664, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1666 = torch.prim.ListConstruct %int16, %int40, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1667 = torch.aten.broadcast_to %1663, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
    %1668 = torch.aten.view %1667, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
    %1669 = torch.aten.bmm %1665, %1668 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1670 = torch.prim.ListConstruct %int16, %int4096, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1671 = torch.aten.view %1669, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1672 = torch.aten.mul.Tensor %1671, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %values_20, %indices_21 = torch.aten.max.dim %1672, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
    %1673 = torch.aten.sub.Tensor %1672, %values_20, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1674 = torch.aten.exp %1673 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1675 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %1676 = torch.aten.sum.dim_IntList %1674, %1675, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
    %1677 = torch.aten.div.Tensor %1674, %1676 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1678 = torch.aten.broadcast_to %1677, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1679 = torch.aten.view %1678, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %1680 = torch.aten.broadcast_to %1662, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %1681 = torch.aten.view %1680, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %1682 = torch.aten.bmm %1679, %1681 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1683 = torch.aten.view %1682, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %1684 = torch.aten.view %1683, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %1685 = torch.aten.permute %1684, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1686 = torch.aten.clone %1685, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %1687 = torch.aten.view %1686, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1688 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1689 = torch.aten.detach %1688 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1690 = torch.aten.view %1687, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1691 = torch.aten.abs %1690 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_22, %indices_23 = torch.aten.max.dim %1691, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %1692 = torch.aten.view %values_22, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %1693 = torch.aten.broadcast_to %1692, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1694 = torch.aten.clone %1693, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %1695 = torch.aten.view %1694, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1696 = torch.aten.sub.Tensor %1689, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1697 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1698 = torch.aten.pow.Tensor_Tensor %1697, %1696 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1699 = torch.aten.neg %1698 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1700 = torch.aten.neg %1699 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1701 = torch.aten.div.Tensor %1695, %1700 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1702 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1703 = torch.aten.detach %1702 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1704 = torch.aten.div.Tensor %1687, %1701 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1705 = torch.aten.add.Tensor %1704, %1703, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1706 = torch.aten.sub.Tensor %1689, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1707 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1708 = torch.aten.pow.Tensor_Tensor %1707, %1706 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1709 = torch.aten.neg %1708 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1710 = torch.aten.sub.Tensor %1689, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1711 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1712 = torch.aten.pow.Tensor_Tensor %1711, %1710 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1713 = torch.aten.sub.Tensor %1712, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1714 = torch.aten.gt.Tensor %1705, %1713 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1715 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1716 = torch.aten.to.dtype %1715, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1717 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1718 = torch.aten.broadcast_to %1716, %1717 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1719 = torch.valsem.aten.copy %1718, %1713, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1720 = torch.aten.where.self %1714, %1719, %1705 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1721 = torch.aten.lt.Tensor %1720, %1709 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %1722 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1723 = torch.aten.to.dtype %1722, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1724 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1725 = torch.aten.broadcast_to %1723, %1724 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1726 = torch.valsem.aten.copy %1725, %1709, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1727 = torch.aten.where.self %1721, %1726, %1720 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1728 = torch.aten.round %1727 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1729 = torch.aten.sub.Tensor %1728, %1703, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1730 = torch.aten.mul.Tensor %1729, %1701 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1731 = torch.aten.broadcast_to %49, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1732 = torch.aten.clone %1731, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %1733 = torch.aten.view %1732, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1734 = torch.aten.mul.Tensor %50, %1733 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1735 = torch.aten.transpose.int %1734, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %1736 = torch.aten.view %1730, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1737 = torch.aten.mm %1736, %1735 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1738 = torch.aten.mul.Scalar %51, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1739 = torch.aten.add.Tensor %1738, %1737, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1740 = torch.aten.view %1739, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1741 = torch.aten.add.Tensor %1740, %1472, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1742 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %1743 = torch.aten.sum.dim_IntList %1741, %1742, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1744 = torch.aten.div.Scalar %1743, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1745 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1746 = torch.aten.broadcast_to %1744, %1745 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1747 = torch.aten.sub.Tensor %1741, %1746, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1748 = torch.aten.mul.Tensor %1747, %1747 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1749 = torch.aten.sum.dim_IntList %1748, %1742, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1750 = torch.aten.div.Scalar %1749, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1751 = torch.aten.add.Scalar %1750, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1752 = torch.aten.rsqrt %1751 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %1753 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1754 = torch.aten.broadcast_to %1752, %1753 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1755 = torch.aten.mul.Tensor %1747, %1754 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1756 = torch.prim.ListConstruct %int2560, %int20, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1757 = torch.aten.broadcast_to %52, %1756 : !torch.vtensor<[2560,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
    %1758 = torch.aten.clone %1757, %int0 : !torch.vtensor<[2560,20,16],f16>, !torch.int -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
    %1759 = torch.prim.ListConstruct %int2560, %int320 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1760 = torch.aten.view %1758, %1759 : !torch.vtensor<[2560,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
    %1761 = torch.aten.mul.Tensor %53, %1760 : !torch.vtensor<[2560,320],si8>, !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
    %1762 = torch.aten.transpose.int %1761, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16> loc(#loc1)
    %1763 = torch.aten.view %1755, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1764 = torch.aten.mm %1763, %1762 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
    %1765 = torch.aten.mul.Scalar %54, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16> loc(#loc1)
    %1766 = torch.aten.add.Tensor %1765, %1764, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
    %1767 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1768 = torch.aten.view %1766, %1767 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16> loc(#loc1)
    %1769 = torch.aten.slice.Tensor %1768, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %1770 = torch.aten.slice.Tensor %1768, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %1771 = torch.aten.gelu %1770, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %1772 = torch.aten.mul.Tensor %1769, %1771 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %1773 = torch.prim.ListConstruct %int320, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %1774 = torch.aten.broadcast_to %55, %1773 : !torch.vtensor<[320,80,1],f16>, !torch.list<int> -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
    %1775 = torch.aten.clone %1774, %int0 : !torch.vtensor<[320,80,16],f16>, !torch.int -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
    %1776 = torch.prim.ListConstruct %int320, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %1777 = torch.aten.view %1775, %1776 : !torch.vtensor<[320,80,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
    %1778 = torch.aten.mul.Tensor %56, %1777 : !torch.vtensor<[320,1280],si8>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
    %1779 = torch.aten.transpose.int %1778, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
    %1780 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %1781 = torch.aten.view %1772, %1780 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16> loc(#loc1)
    %1782 = torch.aten.mm %1781, %1779 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1783 = torch.aten.mul.Scalar %57, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1784 = torch.aten.add.Tensor %1783, %1782, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %1785 = torch.aten.view %1784, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1786 = torch.aten.add.Tensor %1785, %1741, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %1787 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %1788 = torch.aten.view %1786, %1787 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
    %1789 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %1790 = torch.aten.permute %1788, %1789 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1791 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1792 = torch.aten.detach %1791 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1793 = torch.aten.view %1790, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1794 = torch.aten.abs %1793 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_24, %indices_25 = torch.aten.max.dim %1794, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %1795 = torch.aten.view %values_24, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %1796 = torch.aten.broadcast_to %1795, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1797 = torch.aten.clone %1796, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1798 = torch.aten.view %1797, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1799 = torch.aten.sub.Tensor %1792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1800 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1801 = torch.aten.pow.Tensor_Tensor %1800, %1799 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1802 = torch.aten.neg %1801 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1803 = torch.aten.neg %1802 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1804 = torch.aten.div.Tensor %1798, %1803 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1805 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1806 = torch.aten.detach %1805 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1807 = torch.aten.div.Tensor %1790, %1804 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1808 = torch.aten.add.Tensor %1807, %1806, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1809 = torch.aten.sub.Tensor %1792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1810 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1811 = torch.aten.pow.Tensor_Tensor %1810, %1809 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1812 = torch.aten.neg %1811 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1813 = torch.aten.sub.Tensor %1792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1814 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1815 = torch.aten.pow.Tensor_Tensor %1814, %1813 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1816 = torch.aten.sub.Tensor %1815, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1817 = torch.aten.gt.Tensor %1808, %1816 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1818 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1819 = torch.aten.to.dtype %1818, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1820 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1821 = torch.aten.broadcast_to %1819, %1820 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1822 = torch.valsem.aten.copy %1821, %1816, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1823 = torch.aten.where.self %1817, %1822, %1808 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1824 = torch.aten.lt.Tensor %1823, %1812 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1825 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1826 = torch.aten.to.dtype %1825, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1827 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1828 = torch.aten.broadcast_to %1826, %1827 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1829 = torch.valsem.aten.copy %1828, %1812, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1830 = torch.aten.where.self %1824, %1829, %1823 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1831 = torch.aten.round %1830 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1832 = torch.aten.sub.Tensor %1831, %1806, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1833 = torch.aten.mul.Tensor %1832, %1804 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1834 = torch.aten.broadcast_to %58, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %1835 = torch.aten.clone %1834, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %1836 = torch.aten.view %1835, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %1837 = torch.aten.mul.Tensor %59, %1836 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %1838 = torch.aten.convolution %1833, %1837, %60, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1839 = torch.aten.add.Tensor %1838, %1119, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1840 = torch.aten.clone %1839, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1841 = torch.aten.view %1840, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
    %1842 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1843 = torch.aten.to.dtype %1842, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1844 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1845 = torch.aten.broadcast_to %1843, %1844 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1846 = torch.valsem.aten.copy %1845, %1841, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1847 = torch.aten.to.dtype %1846, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1848 = torch.aten.sum.dim_IntList %1847, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1849 = torch.aten.div.Scalar %1848, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1850 = torch.aten.sub.Tensor %1847, %1849, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1851 = torch.aten.mul.Tensor %1850, %1850 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1852 = torch.aten.sum.dim_IntList %1851, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1853 = torch.aten.div.Scalar %1852, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1854 = torch.aten.to.dtype %1853, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1855 = torch.aten.sum.dim_IntList %1846, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1856 = torch.aten.div.Scalar %1855, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1857 = torch.aten.add.Tensor %1854, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1858 = torch.aten.rsqrt %1857 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1859 = torch.aten.sub.Tensor %1841, %1856, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1860 = torch.aten.mul.Tensor %1859, %1858 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1861 = torch.aten.view %1860, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1862 = torch.aten.unsqueeze %61, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %1863 = torch.aten.unsqueeze %1862, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %1864 = torch.aten.mul.Tensor %1861, %1863 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1865 = torch.aten.unsqueeze %62, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %1866 = torch.aten.unsqueeze %1865, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %1867 = torch.aten.add.Tensor %1864, %1866, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1868 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1869 = torch.aten.to.dtype %1868, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1870 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1871 = torch.aten.broadcast_to %1869, %1870 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1872 = torch.valsem.aten.copy %1871, %1867, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1873 = torch.aten.sigmoid %1872 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1874 = torch.aten.mul.Tensor %1873, %1872 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1875 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1876 = torch.aten.detach %1875 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1877 = torch.aten.view %1874, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1878 = torch.aten.abs %1877 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_26, %indices_27 = torch.aten.max.dim %1878, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %1879 = torch.aten.view %values_26, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %1880 = torch.aten.broadcast_to %1879, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1881 = torch.aten.clone %1880, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1882 = torch.aten.view %1881, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1883 = torch.aten.sub.Tensor %1876, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1884 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1885 = torch.aten.pow.Tensor_Tensor %1884, %1883 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1886 = torch.aten.neg %1885 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1887 = torch.aten.neg %1886 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1888 = torch.aten.div.Tensor %1882, %1887 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1889 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1890 = torch.aten.detach %1889 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1891 = torch.aten.div.Tensor %1874, %1888 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1892 = torch.aten.add.Tensor %1891, %1890, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1893 = torch.aten.sub.Tensor %1876, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1894 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1895 = torch.aten.pow.Tensor_Tensor %1894, %1893 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1896 = torch.aten.neg %1895 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1897 = torch.aten.sub.Tensor %1876, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1898 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1899 = torch.aten.pow.Tensor_Tensor %1898, %1897 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1900 = torch.aten.sub.Tensor %1899, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1901 = torch.aten.gt.Tensor %1892, %1900 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1902 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1903 = torch.aten.to.dtype %1902, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1904 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1905 = torch.aten.broadcast_to %1903, %1904 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1906 = torch.valsem.aten.copy %1905, %1900, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1907 = torch.aten.where.self %1901, %1906, %1892 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1908 = torch.aten.lt.Tensor %1907, %1896 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1909 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1910 = torch.aten.to.dtype %1909, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1911 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1912 = torch.aten.broadcast_to %1910, %1911 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1913 = torch.valsem.aten.copy %1912, %1896, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %1914 = torch.aten.where.self %1908, %1913, %1907 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1915 = torch.aten.round %1914 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1916 = torch.aten.sub.Tensor %1915, %1890, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1917 = torch.aten.mul.Tensor %1916, %1888 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1918 = torch.aten.broadcast_to %63, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %1919 = torch.aten.clone %1918, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %1920 = torch.aten.view %1919, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %1921 = torch.aten.mul.Tensor %64, %1920 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %1922 = torch.aten.convolution %1917, %1921, %65, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1923 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %1924 = torch.aten.mul.Tensor %1923, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %1925 = torch.aten.transpose.int %66, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
    %1926 = torch.aten.mm %1924, %1925 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1927 = torch.aten.mul.Scalar %67, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %1928 = torch.aten.add.Tensor %1927, %1926, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1929 = torch.aten.slice.Tensor %1928, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1930 = torch.aten.slice.Tensor %1929, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16> loc(#loc1)
    %1931 = torch.aten.unsqueeze %1930, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16> loc(#loc1)
    %1932 = torch.aten.unsqueeze %1931, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16> loc(#loc1)
    %1933 = torch.aten.add.Tensor %1922, %1932, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1934 = torch.aten.view %1933, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
    %1935 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1936 = torch.aten.to.dtype %1935, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1937 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1938 = torch.aten.broadcast_to %1936, %1937 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1939 = torch.valsem.aten.copy %1938, %1934, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1940 = torch.aten.to.dtype %1939, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1941 = torch.aten.sum.dim_IntList %1940, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1942 = torch.aten.div.Scalar %1941, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1943 = torch.aten.sub.Tensor %1940, %1942, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1944 = torch.aten.mul.Tensor %1943, %1943 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %1945 = torch.aten.sum.dim_IntList %1944, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1946 = torch.aten.div.Scalar %1945, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %1947 = torch.aten.to.dtype %1946, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1948 = torch.aten.sum.dim_IntList %1939, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1949 = torch.aten.div.Scalar %1948, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1950 = torch.aten.add.Tensor %1947, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1951 = torch.aten.rsqrt %1950 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %1952 = torch.aten.sub.Tensor %1934, %1949, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1953 = torch.aten.mul.Tensor %1952, %1951 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %1954 = torch.aten.view %1953, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1955 = torch.aten.unsqueeze %68, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %1956 = torch.aten.unsqueeze %1955, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %1957 = torch.aten.mul.Tensor %1954, %1956 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1958 = torch.aten.unsqueeze %69, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %1959 = torch.aten.unsqueeze %1958, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %1960 = torch.aten.add.Tensor %1957, %1959, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %1961 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1962 = torch.aten.to.dtype %1961, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1963 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1964 = torch.aten.broadcast_to %1962, %1963 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1965 = torch.valsem.aten.copy %1964, %1960, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1966 = torch.aten.sigmoid %1965 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1967 = torch.aten.mul.Tensor %1966, %1965 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1968 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1969 = torch.aten.detach %1968 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1970 = torch.aten.view %1967, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1971 = torch.aten.abs %1970 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_28, %indices_29 = torch.aten.max.dim %1971, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %1972 = torch.aten.view %values_28, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %1973 = torch.aten.broadcast_to %1972, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1974 = torch.aten.clone %1973, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %1975 = torch.aten.view %1974, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1976 = torch.aten.sub.Tensor %1969, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1977 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1978 = torch.aten.pow.Tensor_Tensor %1977, %1976 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1979 = torch.aten.neg %1978 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1980 = torch.aten.neg %1979 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1981 = torch.aten.div.Tensor %1975, %1980 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1982 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1983 = torch.aten.detach %1982 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1984 = torch.aten.div.Tensor %1967, %1981 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1985 = torch.aten.add.Tensor %1984, %1983, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %1986 = torch.aten.sub.Tensor %1969, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1987 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1988 = torch.aten.pow.Tensor_Tensor %1987, %1986 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1989 = torch.aten.neg %1988 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1990 = torch.aten.sub.Tensor %1969, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1991 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %1992 = torch.aten.pow.Tensor_Tensor %1991, %1990 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %1993 = torch.aten.sub.Tensor %1992, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %1994 = torch.aten.gt.Tensor %1985, %1993 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %1995 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %1996 = torch.aten.to.dtype %1995, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %1997 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %1998 = torch.aten.broadcast_to %1996, %1997 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %1999 = torch.valsem.aten.copy %1998, %1993, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2000 = torch.aten.where.self %1994, %1999, %1985 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2001 = torch.aten.lt.Tensor %2000, %1989 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2002 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2003 = torch.aten.to.dtype %2002, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2004 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2005 = torch.aten.broadcast_to %2003, %2004 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2006 = torch.valsem.aten.copy %2005, %1989, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2007 = torch.aten.where.self %2001, %2006, %2000 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2008 = torch.aten.round %2007 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2009 = torch.aten.sub.Tensor %2008, %1983, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2010 = torch.aten.mul.Tensor %2009, %1981 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2011 = torch.aten.broadcast_to %70, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %2012 = torch.aten.clone %2011, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %2013 = torch.aten.view %2012, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %2014 = torch.aten.mul.Tensor %71, %2013 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %2015 = torch.aten.convolution %2010, %2014, %72, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2016 = torch.aten.add.Tensor %1839, %2015, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2017 = torch.aten.div.Tensor %2016, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2018 = torch.aten.clone %2017, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2019 = torch.aten.view %2018, %936 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> loc(#loc1)
    %2020 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2021 = torch.aten.to.dtype %2020, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2022 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2023 = torch.aten.broadcast_to %2021, %2022 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %2024 = torch.valsem.aten.copy %2023, %2019, %false : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,10,4096],f16>, !torch.bool -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %2025 = torch.aten.to.dtype %2024, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %2026 = torch.aten.sum.dim_IntList %2025, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2027 = torch.aten.div.Scalar %2026, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2028 = torch.aten.sub.Tensor %2025, %2027, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %2029 = torch.aten.mul.Tensor %2028, %2028 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> loc(#loc1)
    %2030 = torch.aten.sum.dim_IntList %2029, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2031 = torch.aten.div.Scalar %2030, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2032 = torch.aten.to.dtype %2031, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2033 = torch.aten.sum.dim_IntList %2024, %943, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2034 = torch.aten.div.Scalar %2033, %int40960 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2035 = torch.aten.add.Tensor %2032, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2036 = torch.aten.rsqrt %2035 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2037 = torch.aten.sub.Tensor %2019, %2034, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %2038 = torch.aten.mul.Tensor %2037, %2036 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32> loc(#loc1)
    %2039 = torch.aten.view %2038, %958 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32> loc(#loc1)
    %2040 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2041 = torch.aten.to.dtype %2040, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2042 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2043 = torch.aten.broadcast_to %2041, %2042 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2044 = torch.valsem.aten.copy %2043, %2039, %false : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f32>, !torch.bool -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2045 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2046 = torch.aten.detach %2045 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2047 = torch.aten.view %2044, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2048 = torch.aten.abs %2047 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_30, %indices_31 = torch.aten.max.dim %2048, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %2049 = torch.aten.view %values_30, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %2050 = torch.aten.broadcast_to %2049, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2051 = torch.aten.clone %2050, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2052 = torch.aten.view %2051, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2053 = torch.aten.sub.Tensor %2046, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2054 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2055 = torch.aten.pow.Tensor_Tensor %2054, %2053 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2056 = torch.aten.neg %2055 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2057 = torch.aten.neg %2056 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2058 = torch.aten.div.Tensor %2052, %2057 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2059 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2060 = torch.aten.detach %2059 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2061 = torch.aten.div.Tensor %2044, %2058 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2062 = torch.aten.add.Tensor %2061, %2060, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2063 = torch.aten.sub.Tensor %2046, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2064 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2065 = torch.aten.pow.Tensor_Tensor %2064, %2063 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2066 = torch.aten.neg %2065 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2067 = torch.aten.sub.Tensor %2046, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2068 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2069 = torch.aten.pow.Tensor_Tensor %2068, %2067 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2070 = torch.aten.sub.Tensor %2069, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2071 = torch.aten.gt.Tensor %2062, %2070 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2072 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2073 = torch.aten.to.dtype %2072, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2074 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2075 = torch.aten.broadcast_to %2073, %2074 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2076 = torch.valsem.aten.copy %2075, %2070, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2077 = torch.aten.where.self %2071, %2076, %2062 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2078 = torch.aten.lt.Tensor %2077, %2066 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2079 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2080 = torch.aten.to.dtype %2079, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2081 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2082 = torch.aten.broadcast_to %2080, %2081 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2083 = torch.valsem.aten.copy %2082, %2066, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2084 = torch.aten.where.self %2078, %2083, %2077 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2085 = torch.aten.round %2084 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2086 = torch.aten.sub.Tensor %2085, %2060, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2087 = torch.aten.mul.Tensor %2086, %2058 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2088 = torch.aten.broadcast_to %73, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %2089 = torch.aten.clone %2088, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %2090 = torch.aten.view %2089, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %2091 = torch.aten.mul.Tensor %74, %2090 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %2092 = torch.aten.convolution %2087, %2091, %75, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2093 = torch.aten.permute %2092, %1196 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
    %2094 = torch.aten.view %2093, %1198 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2095 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %2096 = torch.aten.sum.dim_IntList %2094, %2095, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2097 = torch.aten.div.Scalar %2096, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2098 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2099 = torch.aten.broadcast_to %2097, %2098 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2100 = torch.aten.sub.Tensor %2094, %2099, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2101 = torch.aten.mul.Tensor %2100, %2100 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2102 = torch.aten.sum.dim_IntList %2101, %2095, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2103 = torch.aten.div.Scalar %2102, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2104 = torch.aten.add.Scalar %2103, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2105 = torch.aten.rsqrt %2104 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2106 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2107 = torch.aten.broadcast_to %2105, %2106 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2108 = torch.aten.mul.Tensor %2100, %2107 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2109 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2110 = torch.aten.detach %2109 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2111 = torch.aten.view %2108, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2112 = torch.aten.abs %2111 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_32, %indices_33 = torch.aten.max.dim %2112, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %2113 = torch.aten.view %values_32, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %2114 = torch.aten.broadcast_to %2113, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2115 = torch.aten.clone %2114, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2116 = torch.aten.view %2115, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2117 = torch.aten.sub.Tensor %2110, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2118 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2119 = torch.aten.pow.Tensor_Tensor %2118, %2117 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2120 = torch.aten.neg %2119 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2121 = torch.aten.neg %2120 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2122 = torch.aten.div.Tensor %2116, %2121 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2123 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2124 = torch.aten.detach %2123 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2125 = torch.aten.div.Tensor %2108, %2122 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2126 = torch.aten.add.Tensor %2125, %2124, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2127 = torch.aten.sub.Tensor %2110, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2128 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2129 = torch.aten.pow.Tensor_Tensor %2128, %2127 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2130 = torch.aten.neg %2129 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2131 = torch.aten.sub.Tensor %2110, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2132 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2133 = torch.aten.pow.Tensor_Tensor %2132, %2131 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2134 = torch.aten.sub.Tensor %2133, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2135 = torch.aten.gt.Tensor %2126, %2134 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2136 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2137 = torch.aten.to.dtype %2136, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2138 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2139 = torch.aten.broadcast_to %2137, %2138 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2140 = torch.valsem.aten.copy %2139, %2134, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2141 = torch.aten.where.self %2135, %2140, %2126 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2142 = torch.aten.lt.Tensor %2141, %2130 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2143 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2144 = torch.aten.to.dtype %2143, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2145 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2146 = torch.aten.broadcast_to %2144, %2145 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2147 = torch.valsem.aten.copy %2146, %2130, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2148 = torch.aten.where.self %2142, %2147, %2141 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2149 = torch.aten.round %2148 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2150 = torch.aten.sub.Tensor %2149, %2124, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2151 = torch.aten.mul.Tensor %2150, %2122 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2152 = torch.aten.broadcast_to %76, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2153 = torch.aten.clone %2152, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2154 = torch.aten.view %2153, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2155 = torch.aten.mul.Tensor %77, %2154 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2156 = torch.aten.transpose.int %2155, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2157 = torch.aten.view %2151, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2158 = torch.aten.mm %2157, %2156 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2159 = torch.aten.mul.Scalar %78, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2160 = torch.aten.add.Tensor %2159, %2158, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2161 = torch.aten.view %2160, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2162 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2163 = torch.aten.detach %2162 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2164 = torch.aten.view %2108, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2165 = torch.aten.abs %2164 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_34, %indices_35 = torch.aten.max.dim %2165, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %2166 = torch.aten.view %values_34, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %2167 = torch.aten.broadcast_to %2166, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2168 = torch.aten.clone %2167, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2169 = torch.aten.view %2168, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2170 = torch.aten.sub.Tensor %2163, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2171 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2172 = torch.aten.pow.Tensor_Tensor %2171, %2170 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2173 = torch.aten.neg %2172 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2174 = torch.aten.neg %2173 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2175 = torch.aten.div.Tensor %2169, %2174 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2176 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2177 = torch.aten.detach %2176 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2178 = torch.aten.div.Tensor %2108, %2175 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2179 = torch.aten.add.Tensor %2178, %2177, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2180 = torch.aten.sub.Tensor %2163, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2181 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2182 = torch.aten.pow.Tensor_Tensor %2181, %2180 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2183 = torch.aten.neg %2182 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2184 = torch.aten.sub.Tensor %2163, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2185 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2186 = torch.aten.pow.Tensor_Tensor %2185, %2184 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2187 = torch.aten.sub.Tensor %2186, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2188 = torch.aten.gt.Tensor %2179, %2187 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2189 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2190 = torch.aten.to.dtype %2189, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2191 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2192 = torch.aten.broadcast_to %2190, %2191 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2193 = torch.valsem.aten.copy %2192, %2187, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2194 = torch.aten.where.self %2188, %2193, %2179 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2195 = torch.aten.lt.Tensor %2194, %2183 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2196 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2197 = torch.aten.to.dtype %2196, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2198 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2199 = torch.aten.broadcast_to %2197, %2198 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2200 = torch.valsem.aten.copy %2199, %2183, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2201 = torch.aten.where.self %2195, %2200, %2194 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2202 = torch.aten.round %2201 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2203 = torch.aten.sub.Tensor %2202, %2177, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2204 = torch.aten.mul.Tensor %2203, %2175 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2205 = torch.aten.broadcast_to %79, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2206 = torch.aten.clone %2205, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2207 = torch.aten.view %2206, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2208 = torch.aten.mul.Tensor %80, %2207 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2209 = torch.aten.transpose.int %2208, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2210 = torch.aten.view %2204, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2211 = torch.aten.mm %2210, %2209 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2212 = torch.aten.mul.Scalar %81, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2213 = torch.aten.add.Tensor %2212, %2211, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2214 = torch.aten.view %2213, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2215 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2216 = torch.aten.detach %2215 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2217 = torch.aten.view %2108, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2218 = torch.aten.abs %2217 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_36, %indices_37 = torch.aten.max.dim %2218, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %2219 = torch.aten.view %values_36, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %2220 = torch.aten.broadcast_to %2219, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2221 = torch.aten.clone %2220, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2222 = torch.aten.view %2221, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2223 = torch.aten.sub.Tensor %2216, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2224 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2225 = torch.aten.pow.Tensor_Tensor %2224, %2223 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2226 = torch.aten.neg %2225 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2227 = torch.aten.neg %2226 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2228 = torch.aten.div.Tensor %2222, %2227 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2229 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2230 = torch.aten.detach %2229 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2231 = torch.aten.div.Tensor %2108, %2228 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2232 = torch.aten.add.Tensor %2231, %2230, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2233 = torch.aten.sub.Tensor %2216, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2234 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2235 = torch.aten.pow.Tensor_Tensor %2234, %2233 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2236 = torch.aten.neg %2235 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2237 = torch.aten.sub.Tensor %2216, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2238 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2239 = torch.aten.pow.Tensor_Tensor %2238, %2237 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2240 = torch.aten.sub.Tensor %2239, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2241 = torch.aten.gt.Tensor %2232, %2240 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2242 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2243 = torch.aten.to.dtype %2242, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2244 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2245 = torch.aten.broadcast_to %2243, %2244 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2246 = torch.valsem.aten.copy %2245, %2240, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2247 = torch.aten.where.self %2241, %2246, %2232 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2248 = torch.aten.lt.Tensor %2247, %2236 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2249 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2250 = torch.aten.to.dtype %2249, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2251 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2252 = torch.aten.broadcast_to %2250, %2251 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2253 = torch.valsem.aten.copy %2252, %2236, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2254 = torch.aten.where.self %2248, %2253, %2247 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2255 = torch.aten.round %2254 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2256 = torch.aten.sub.Tensor %2255, %2230, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2257 = torch.aten.mul.Tensor %2256, %2228 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2258 = torch.aten.broadcast_to %82, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2259 = torch.aten.clone %2258, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2260 = torch.aten.view %2259, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2261 = torch.aten.mul.Tensor %83, %2260 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2262 = torch.aten.transpose.int %2261, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2263 = torch.aten.view %2257, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2264 = torch.aten.mm %2263, %2262 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2265 = torch.aten.mul.Scalar %84, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2266 = torch.aten.add.Tensor %2265, %2264, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2267 = torch.aten.view %2266, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2268 = torch.aten.view %2161, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2269 = torch.aten.permute %2268, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2270 = torch.aten.clone %2269, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2271 = torch.aten.view %2270, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2272 = torch.aten.view %2214, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2273 = torch.aten.permute %2272, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2274 = torch.aten.clone %2273, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2275 = torch.aten.view %2274, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2276 = torch.aten.view %2267, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2277 = torch.aten.permute %2276, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2278 = torch.aten.clone %2277, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2279 = torch.aten.view %2278, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2280 = torch.aten.transpose.int %2275, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
    %2281 = torch.aten.broadcast_to %2271, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2282 = torch.aten.view %2281, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2283 = torch.aten.broadcast_to %2280, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
    %2284 = torch.aten.view %2283, %1396 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16> loc(#loc1)
    %2285 = torch.aten.bmm %2282, %2284 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2286 = torch.aten.view %2285, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2287 = torch.aten.mul.Tensor %2286, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %values_38, %indices_39 = torch.aten.max.dim %2287, %int-1, %true : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
    %2288 = torch.aten.sub.Tensor %2287, %values_38, %float1.000000e00 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2289 = torch.aten.exp %2288 : !torch.vtensor<[16,4096,4096],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2290 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %2291 = torch.aten.sum.dim_IntList %2289, %2290, %true, %none : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
    %2292 = torch.aten.div.Tensor %2289, %2291 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2293 = torch.aten.broadcast_to %2292, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2294 = torch.aten.view %2293, %1400 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16> loc(#loc1)
    %2295 = torch.aten.broadcast_to %2279, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2296 = torch.aten.view %2295, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2297 = torch.aten.bmm %2294, %2296 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2298 = torch.aten.view %2297, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2299 = torch.aten.view %2298, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2300 = torch.aten.permute %2299, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2301 = torch.aten.clone %2300, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2302 = torch.aten.view %2301, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2303 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2304 = torch.aten.detach %2303 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2305 = torch.aten.view %2302, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2306 = torch.aten.abs %2305 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_40, %indices_41 = torch.aten.max.dim %2306, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %2307 = torch.aten.view %values_40, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %2308 = torch.aten.broadcast_to %2307, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2309 = torch.aten.clone %2308, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2310 = torch.aten.view %2309, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2311 = torch.aten.sub.Tensor %2304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2312 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2313 = torch.aten.pow.Tensor_Tensor %2312, %2311 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2314 = torch.aten.neg %2313 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2315 = torch.aten.neg %2314 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2316 = torch.aten.div.Tensor %2310, %2315 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2317 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2318 = torch.aten.detach %2317 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2319 = torch.aten.div.Tensor %2302, %2316 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2320 = torch.aten.add.Tensor %2319, %2318, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2321 = torch.aten.sub.Tensor %2304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2322 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2323 = torch.aten.pow.Tensor_Tensor %2322, %2321 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2324 = torch.aten.neg %2323 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2325 = torch.aten.sub.Tensor %2304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2326 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2327 = torch.aten.pow.Tensor_Tensor %2326, %2325 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2328 = torch.aten.sub.Tensor %2327, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2329 = torch.aten.gt.Tensor %2320, %2328 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2330 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2331 = torch.aten.to.dtype %2330, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2332 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2333 = torch.aten.broadcast_to %2331, %2332 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2334 = torch.valsem.aten.copy %2333, %2328, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2335 = torch.aten.where.self %2329, %2334, %2320 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2336 = torch.aten.lt.Tensor %2335, %2324 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2337 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2338 = torch.aten.to.dtype %2337, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2339 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2340 = torch.aten.broadcast_to %2338, %2339 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2341 = torch.valsem.aten.copy %2340, %2324, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2342 = torch.aten.where.self %2336, %2341, %2335 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2343 = torch.aten.round %2342 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2344 = torch.aten.sub.Tensor %2343, %2318, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2345 = torch.aten.mul.Tensor %2344, %2316 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2346 = torch.aten.broadcast_to %85, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2347 = torch.aten.clone %2346, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2348 = torch.aten.view %2347, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2349 = torch.aten.mul.Tensor %86, %2348 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2350 = torch.aten.transpose.int %2349, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2351 = torch.aten.view %2345, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2352 = torch.aten.mm %2351, %2350 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2353 = torch.aten.mul.Scalar %87, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2354 = torch.aten.add.Tensor %2353, %2352, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2355 = torch.aten.view %2354, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2356 = torch.aten.add.Tensor %2355, %2094, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2357 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %2358 = torch.aten.sum.dim_IntList %2356, %2357, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2359 = torch.aten.div.Scalar %2358, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2360 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2361 = torch.aten.broadcast_to %2359, %2360 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2362 = torch.aten.sub.Tensor %2356, %2361, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2363 = torch.aten.mul.Tensor %2362, %2362 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2364 = torch.aten.sum.dim_IntList %2363, %2357, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2365 = torch.aten.div.Scalar %2364, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2366 = torch.aten.add.Scalar %2365, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2367 = torch.aten.rsqrt %2366 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2368 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2369 = torch.aten.broadcast_to %2367, %2368 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2370 = torch.aten.mul.Tensor %2362, %2369 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2371 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2372 = torch.aten.detach %2371 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2373 = torch.aten.view %2370, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2374 = torch.aten.abs %2373 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_42, %indices_43 = torch.aten.max.dim %2374, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %2375 = torch.aten.view %values_42, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %2376 = torch.aten.broadcast_to %2375, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2377 = torch.aten.clone %2376, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2378 = torch.aten.view %2377, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2379 = torch.aten.sub.Tensor %2372, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2380 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2381 = torch.aten.pow.Tensor_Tensor %2380, %2379 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2382 = torch.aten.neg %2381 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2383 = torch.aten.neg %2382 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2384 = torch.aten.div.Tensor %2378, %2383 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2385 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2386 = torch.aten.detach %2385 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2387 = torch.aten.div.Tensor %2370, %2384 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2388 = torch.aten.add.Tensor %2387, %2386, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2389 = torch.aten.sub.Tensor %2372, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2390 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2391 = torch.aten.pow.Tensor_Tensor %2390, %2389 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2392 = torch.aten.neg %2391 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2393 = torch.aten.sub.Tensor %2372, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2394 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2395 = torch.aten.pow.Tensor_Tensor %2394, %2393 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2396 = torch.aten.sub.Tensor %2395, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2397 = torch.aten.gt.Tensor %2388, %2396 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2398 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2399 = torch.aten.to.dtype %2398, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2400 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2401 = torch.aten.broadcast_to %2399, %2400 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2402 = torch.valsem.aten.copy %2401, %2396, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2403 = torch.aten.where.self %2397, %2402, %2388 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2404 = torch.aten.lt.Tensor %2403, %2392 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2405 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2406 = torch.aten.to.dtype %2405, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2407 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2408 = torch.aten.broadcast_to %2406, %2407 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2409 = torch.valsem.aten.copy %2408, %2392, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2410 = torch.aten.where.self %2404, %2409, %2403 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2411 = torch.aten.round %2410 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2412 = torch.aten.sub.Tensor %2411, %2386, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2413 = torch.aten.mul.Tensor %2412, %2384 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2414 = torch.aten.broadcast_to %88, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2415 = torch.aten.clone %2414, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2416 = torch.aten.view %2415, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2417 = torch.aten.mul.Tensor %89, %2416 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2418 = torch.aten.transpose.int %2417, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2419 = torch.aten.view %2413, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2420 = torch.aten.mm %2419, %2418 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2421 = torch.aten.mul.Scalar %90, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2422 = torch.aten.add.Tensor %2421, %2420, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2423 = torch.aten.view %2422, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2424 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2425 = torch.aten.detach %2424 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2426 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %2427 = torch.aten.abs %2426 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_44, %indices_45 = torch.aten.max.dim %2427, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %2428 = torch.aten.view %values_44, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %2429 = torch.aten.broadcast_to %2428, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %2430 = torch.aten.clone %2429, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %2431 = torch.aten.view %2430, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2432 = torch.aten.sub.Tensor %2425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2433 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2434 = torch.aten.pow.Tensor_Tensor %2433, %2432 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2435 = torch.aten.neg %2434 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2436 = torch.aten.neg %2435 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2437 = torch.aten.div.Tensor %2431, %2436 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2438 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2439 = torch.aten.detach %2438 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2440 = torch.aten.div.Tensor %arg2, %2437 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2441 = torch.aten.add.Tensor %2440, %2439, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2442 = torch.aten.sub.Tensor %2425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2443 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2444 = torch.aten.pow.Tensor_Tensor %2443, %2442 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2445 = torch.aten.neg %2444 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2446 = torch.aten.sub.Tensor %2425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2447 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2448 = torch.aten.pow.Tensor_Tensor %2447, %2446 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2449 = torch.aten.sub.Tensor %2448, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2450 = torch.aten.gt.Tensor %2441, %2449 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %2451 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2452 = torch.aten.to.dtype %2451, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2453 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2454 = torch.aten.broadcast_to %2452, %2453 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2455 = torch.valsem.aten.copy %2454, %2449, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2456 = torch.aten.where.self %2450, %2455, %2441 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2457 = torch.aten.lt.Tensor %2456, %2445 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %2458 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2459 = torch.aten.to.dtype %2458, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2460 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2461 = torch.aten.broadcast_to %2459, %2460 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2462 = torch.valsem.aten.copy %2461, %2445, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2463 = torch.aten.where.self %2457, %2462, %2456 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2464 = torch.aten.round %2463 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2465 = torch.aten.sub.Tensor %2464, %2439, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2466 = torch.aten.mul.Tensor %2465, %2437 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2467 = torch.aten.broadcast_to %91, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %2468 = torch.aten.clone %2467, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %2469 = torch.aten.view %2468, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %2470 = torch.aten.mul.Tensor %92, %2469 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %2471 = torch.aten.transpose.int %2470, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
    %2472 = torch.aten.view %2466, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %2473 = torch.aten.mm %2472, %2471 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
    %2474 = torch.aten.view %2473, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
    %2475 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2476 = torch.aten.detach %2475 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2477 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %2478 = torch.aten.abs %2477 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_46, %indices_47 = torch.aten.max.dim %2478, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %2479 = torch.aten.view %values_46, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %2480 = torch.aten.broadcast_to %2479, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %2481 = torch.aten.clone %2480, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %2482 = torch.aten.view %2481, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2483 = torch.aten.sub.Tensor %2476, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2484 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2485 = torch.aten.pow.Tensor_Tensor %2484, %2483 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2486 = torch.aten.neg %2485 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2487 = torch.aten.neg %2486 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2488 = torch.aten.div.Tensor %2482, %2487 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2489 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2490 = torch.aten.detach %2489 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2491 = torch.aten.div.Tensor %arg2, %2488 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2492 = torch.aten.add.Tensor %2491, %2490, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2493 = torch.aten.sub.Tensor %2476, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2494 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2495 = torch.aten.pow.Tensor_Tensor %2494, %2493 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2496 = torch.aten.neg %2495 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2497 = torch.aten.sub.Tensor %2476, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2498 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2499 = torch.aten.pow.Tensor_Tensor %2498, %2497 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2500 = torch.aten.sub.Tensor %2499, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2501 = torch.aten.gt.Tensor %2492, %2500 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %2502 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2503 = torch.aten.to.dtype %2502, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2504 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2505 = torch.aten.broadcast_to %2503, %2504 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2506 = torch.valsem.aten.copy %2505, %2500, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2507 = torch.aten.where.self %2501, %2506, %2492 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2508 = torch.aten.lt.Tensor %2507, %2496 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %2509 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2510 = torch.aten.to.dtype %2509, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2511 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2512 = torch.aten.broadcast_to %2510, %2511 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2513 = torch.valsem.aten.copy %2512, %2496, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2514 = torch.aten.where.self %2508, %2513, %2507 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2515 = torch.aten.round %2514 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2516 = torch.aten.sub.Tensor %2515, %2490, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2517 = torch.aten.mul.Tensor %2516, %2488 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %2518 = torch.aten.broadcast_to %93, %1586 : !torch.vtensor<[320,48,1],f16>, !torch.list<int> -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %2519 = torch.aten.clone %2518, %int0 : !torch.vtensor<[320,48,16],f16>, !torch.int -> !torch.vtensor<[320,48,16],f16> loc(#loc1)
    %2520 = torch.aten.view %2519, %1589 : !torch.vtensor<[320,48,16],f16>, !torch.list<int> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %2521 = torch.aten.mul.Tensor %94, %2520 : !torch.vtensor<[320,768],si8>, !torch.vtensor<[320,768],f16> -> !torch.vtensor<[320,768],f16> loc(#loc1)
    %2522 = torch.aten.transpose.int %2521, %int0, %int1 : !torch.vtensor<[320,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,320],f16> loc(#loc1)
    %2523 = torch.aten.view %2517, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %2524 = torch.aten.mm %2523, %2522 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16> loc(#loc1)
    %2525 = torch.aten.view %2524, %1596 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16> loc(#loc1)
    %2526 = torch.aten.view %2423, %1378 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2527 = torch.aten.permute %2526, %1380 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2528 = torch.aten.clone %2527, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2529 = torch.aten.view %2528, %1383 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2530 = torch.aten.view %2474, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
    %2531 = torch.aten.permute %2530, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %2532 = torch.aten.clone %2531, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %2533 = torch.aten.view %2532, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %2534 = torch.aten.view %2525, %1653 : !torch.vtensor<[2,77,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16> loc(#loc1)
    %2535 = torch.aten.permute %2534, %1380 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %2536 = torch.aten.clone %2535, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16> loc(#loc1)
    %2537 = torch.aten.view %2536, %1657 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %2538 = torch.aten.transpose.int %2533, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
    %2539 = torch.aten.broadcast_to %2529, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2540 = torch.aten.view %2539, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2541 = torch.aten.broadcast_to %2538, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
    %2542 = torch.aten.view %2541, %1666 : !torch.vtensor<[16,40,77],f16>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16> loc(#loc1)
    %2543 = torch.aten.bmm %2540, %2542 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2544 = torch.aten.view %2543, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2545 = torch.aten.mul.Tensor %2544, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %values_48, %indices_49 = torch.aten.max.dim %2545, %int-1, %true : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,1],f16>, !torch.vtensor<[16,4096,1],si64> loc(#loc1)
    %2546 = torch.aten.sub.Tensor %2545, %values_48, %float1.000000e00 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16>, !torch.float -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2547 = torch.aten.exp %2546 : !torch.vtensor<[16,4096,77],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2548 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %2549 = torch.aten.sum.dim_IntList %2547, %2548, %true, %none : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,4096,1],f16> loc(#loc1)
    %2550 = torch.aten.div.Tensor %2547, %2549 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,4096,1],f16> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2551 = torch.aten.broadcast_to %2550, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2552 = torch.aten.view %2551, %1670 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16> loc(#loc1)
    %2553 = torch.aten.broadcast_to %2537, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %2554 = torch.aten.view %2553, %1657 : !torch.vtensor<[16,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16> loc(#loc1)
    %2555 = torch.aten.bmm %2552, %2554 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2556 = torch.aten.view %2555, %1383 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16> loc(#loc1)
    %2557 = torch.aten.view %2556, %1414 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16> loc(#loc1)
    %2558 = torch.aten.permute %2557, %1380 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2559 = torch.aten.clone %2558, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16> loc(#loc1)
    %2560 = torch.aten.view %2559, %1198 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2561 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2562 = torch.aten.detach %2561 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2563 = torch.aten.view %2560, %1216 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2564 = torch.aten.abs %2563 : !torch.vtensor<[2,4096,20,16],f16> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %values_50, %indices_51 = torch.aten.max.dim %2564, %int3, %true : !torch.vtensor<[2,4096,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,4096,20,1],f16>, !torch.vtensor<[2,4096,20,1],si64> loc(#loc1)
    %2565 = torch.aten.view %values_50, %1219 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,1],f16> loc(#loc1)
    %2566 = torch.aten.broadcast_to %2565, %1216 : !torch.vtensor<[2,4096,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2567 = torch.aten.clone %2566, %int0 : !torch.vtensor<[2,4096,20,16],f16>, !torch.int -> !torch.vtensor<[2,4096,20,16],f16> loc(#loc1)
    %2568 = torch.aten.view %2567, %1198 : !torch.vtensor<[2,4096,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2569 = torch.aten.sub.Tensor %2562, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2570 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2571 = torch.aten.pow.Tensor_Tensor %2570, %2569 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2572 = torch.aten.neg %2571 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2573 = torch.aten.neg %2572 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2574 = torch.aten.div.Tensor %2568, %2573 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2575 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2576 = torch.aten.detach %2575 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2577 = torch.aten.div.Tensor %2560, %2574 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2578 = torch.aten.add.Tensor %2577, %2576, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2579 = torch.aten.sub.Tensor %2562, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2580 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2581 = torch.aten.pow.Tensor_Tensor %2580, %2579 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2582 = torch.aten.neg %2581 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2583 = torch.aten.sub.Tensor %2562, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2584 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2585 = torch.aten.pow.Tensor_Tensor %2584, %2583 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2586 = torch.aten.sub.Tensor %2585, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2587 = torch.aten.gt.Tensor %2578, %2586 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2588 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2589 = torch.aten.to.dtype %2588, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2590 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2591 = torch.aten.broadcast_to %2589, %2590 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2592 = torch.valsem.aten.copy %2591, %2586, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2593 = torch.aten.where.self %2587, %2592, %2578 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2594 = torch.aten.lt.Tensor %2593, %2582 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,4096,320],i1> loc(#loc1)
    %2595 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2596 = torch.aten.to.dtype %2595, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2597 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2598 = torch.aten.broadcast_to %2596, %2597 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2599 = torch.valsem.aten.copy %2598, %2582, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2600 = torch.aten.where.self %2594, %2599, %2593 : !torch.vtensor<[2,4096,320],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2601 = torch.aten.round %2600 : !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2602 = torch.aten.sub.Tensor %2601, %2576, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2603 = torch.aten.mul.Tensor %2602, %2574 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2604 = torch.aten.broadcast_to %95, %1259 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2605 = torch.aten.clone %2604, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16> loc(#loc1)
    %2606 = torch.aten.view %2605, %1262 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2607 = torch.aten.mul.Tensor %96, %2606 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2608 = torch.aten.transpose.int %2607, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> loc(#loc1)
    %2609 = torch.aten.view %2603, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2610 = torch.aten.mm %2609, %2608 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2611 = torch.aten.mul.Scalar %97, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2612 = torch.aten.add.Tensor %2611, %2610, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2613 = torch.aten.view %2612, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2614 = torch.aten.add.Tensor %2613, %2356, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2615 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %2616 = torch.aten.sum.dim_IntList %2614, %2615, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2617 = torch.aten.div.Scalar %2616, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2618 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2619 = torch.aten.broadcast_to %2617, %2618 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2620 = torch.aten.sub.Tensor %2614, %2619, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2621 = torch.aten.mul.Tensor %2620, %2620 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2622 = torch.aten.sum.dim_IntList %2621, %2615, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2623 = torch.aten.div.Scalar %2622, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2624 = torch.aten.add.Scalar %2623, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2625 = torch.aten.rsqrt %2624 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> loc(#loc1)
    %2626 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2627 = torch.aten.broadcast_to %2625, %2626 : !torch.vtensor<[2,4096,1],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2628 = torch.aten.mul.Tensor %2620, %2627 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2629 = torch.aten.broadcast_to %98, %1756 : !torch.vtensor<[2560,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
    %2630 = torch.aten.clone %2629, %int0 : !torch.vtensor<[2560,20,16],f16>, !torch.int -> !torch.vtensor<[2560,20,16],f16> loc(#loc1)
    %2631 = torch.aten.view %2630, %1759 : !torch.vtensor<[2560,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
    %2632 = torch.aten.mul.Tensor %99, %2631 : !torch.vtensor<[2560,320],si8>, !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[2560,320],f16> loc(#loc1)
    %2633 = torch.aten.transpose.int %2632, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16> loc(#loc1)
    %2634 = torch.aten.view %2628, %1266 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2635 = torch.aten.mm %2634, %2633 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
    %2636 = torch.aten.mul.Scalar %100, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16> loc(#loc1)
    %2637 = torch.aten.add.Tensor %2636, %2635, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16> loc(#loc1)
    %2638 = torch.aten.view %2637, %1767 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16> loc(#loc1)
    %2639 = torch.aten.slice.Tensor %2638, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %2640 = torch.aten.slice.Tensor %2638, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %2641 = torch.aten.gelu %2640, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %2642 = torch.aten.mul.Tensor %2639, %2641 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16> loc(#loc1)
    %2643 = torch.aten.broadcast_to %101, %1773 : !torch.vtensor<[320,80,1],f16>, !torch.list<int> -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
    %2644 = torch.aten.clone %2643, %int0 : !torch.vtensor<[320,80,16],f16>, !torch.int -> !torch.vtensor<[320,80,16],f16> loc(#loc1)
    %2645 = torch.aten.view %2644, %1776 : !torch.vtensor<[320,80,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
    %2646 = torch.aten.mul.Tensor %102, %2645 : !torch.vtensor<[320,1280],si8>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[320,1280],f16> loc(#loc1)
    %2647 = torch.aten.transpose.int %2646, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> loc(#loc1)
    %2648 = torch.aten.view %2642, %1780 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16> loc(#loc1)
    %2649 = torch.aten.mm %2648, %2647 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2650 = torch.aten.mul.Scalar %103, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> loc(#loc1)
    %2651 = torch.aten.add.Tensor %2650, %2649, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> loc(#loc1)
    %2652 = torch.aten.view %2651, %1198 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2653 = torch.aten.add.Tensor %2652, %2614, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> loc(#loc1)
    %2654 = torch.aten.view %2653, %1787 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> loc(#loc1)
    %2655 = torch.aten.permute %2654, %1789 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2656 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2657 = torch.aten.detach %2656 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2658 = torch.aten.view %2655, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2659 = torch.aten.abs %2658 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_52, %indices_53 = torch.aten.max.dim %2659, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %2660 = torch.aten.view %values_52, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %2661 = torch.aten.broadcast_to %2660, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2662 = torch.aten.clone %2661, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2663 = torch.aten.view %2662, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2664 = torch.aten.sub.Tensor %2657, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2665 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2666 = torch.aten.pow.Tensor_Tensor %2665, %2664 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2667 = torch.aten.neg %2666 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2668 = torch.aten.neg %2667 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2669 = torch.aten.div.Tensor %2663, %2668 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2670 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2671 = torch.aten.detach %2670 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2672 = torch.aten.div.Tensor %2655, %2669 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2673 = torch.aten.add.Tensor %2672, %2671, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2674 = torch.aten.sub.Tensor %2657, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2675 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2676 = torch.aten.pow.Tensor_Tensor %2675, %2674 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2677 = torch.aten.neg %2676 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2678 = torch.aten.sub.Tensor %2657, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2679 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2680 = torch.aten.pow.Tensor_Tensor %2679, %2678 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2681 = torch.aten.sub.Tensor %2680, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2682 = torch.aten.gt.Tensor %2673, %2681 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2683 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2684 = torch.aten.to.dtype %2683, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2685 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2686 = torch.aten.broadcast_to %2684, %2685 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2687 = torch.valsem.aten.copy %2686, %2681, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2688 = torch.aten.where.self %2682, %2687, %2673 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2689 = torch.aten.lt.Tensor %2688, %2677 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2690 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2691 = torch.aten.to.dtype %2690, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2692 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2693 = torch.aten.broadcast_to %2691, %2692 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2694 = torch.valsem.aten.copy %2693, %2677, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2695 = torch.aten.where.self %2689, %2694, %2688 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2696 = torch.aten.round %2695 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2697 = torch.aten.sub.Tensor %2696, %2671, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2698 = torch.aten.mul.Tensor %2697, %2669 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2699 = torch.aten.broadcast_to %104, %1189 : !torch.vtensor<[320,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %2700 = torch.aten.clone %2699, %int0 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[320,20,16,1,1],f16> loc(#loc1)
    %2701 = torch.aten.view %2700, %1192 : !torch.vtensor<[320,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %2702 = torch.aten.mul.Tensor %105, %2701 : !torch.vtensor<[320,320,1,1],si8>, !torch.vtensor<[320,320,1,1],f16> -> !torch.vtensor<[320,320,1,1],f16> loc(#loc1)
    %2703 = torch.aten.convolution %2698, %2702, %106, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2704 = torch.aten.add.Tensor %2703, %2017, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2705 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2706 = torch.aten.detach %2705 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2707 = torch.aten.view %2704, %975 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2708 = torch.aten.abs %2707 : !torch.vtensor<[2,20,16,64,64],f16> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %values_54, %indices_55 = torch.aten.max.dim %2708, %int2, %true : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,64,64],f16>, !torch.vtensor<[2,20,1,64,64],si64> loc(#loc1)
    %2709 = torch.aten.view %values_54, %978 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,64,64],f16> loc(#loc1)
    %2710 = torch.aten.broadcast_to %2709, %975 : !torch.vtensor<[2,20,1,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2711 = torch.aten.clone %2710, %int0 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,16,64,64],f16> loc(#loc1)
    %2712 = torch.aten.view %2711, %958 : !torch.vtensor<[2,20,16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2713 = torch.aten.sub.Tensor %2706, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2714 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2715 = torch.aten.pow.Tensor_Tensor %2714, %2713 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2716 = torch.aten.neg %2715 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2717 = torch.aten.neg %2716 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2718 = torch.aten.div.Tensor %2712, %2717 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2719 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2720 = torch.aten.detach %2719 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2721 = torch.aten.div.Tensor %2704, %2718 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2722 = torch.aten.add.Tensor %2721, %2720, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2723 = torch.aten.sub.Tensor %2706, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2724 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2725 = torch.aten.pow.Tensor_Tensor %2724, %2723 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2726 = torch.aten.neg %2725 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2727 = torch.aten.sub.Tensor %2706, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2728 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2729 = torch.aten.pow.Tensor_Tensor %2728, %2727 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2730 = torch.aten.sub.Tensor %2729, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2731 = torch.aten.gt.Tensor %2722, %2730 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2732 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2733 = torch.aten.to.dtype %2732, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2734 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2735 = torch.aten.broadcast_to %2733, %2734 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2736 = torch.valsem.aten.copy %2735, %2730, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2737 = torch.aten.where.self %2731, %2736, %2722 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2738 = torch.aten.lt.Tensor %2737, %2726 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,64,64],i1> loc(#loc1)
    %2739 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2740 = torch.aten.to.dtype %2739, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2741 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2742 = torch.aten.broadcast_to %2740, %2741 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2743 = torch.valsem.aten.copy %2742, %2726, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2744 = torch.aten.where.self %2738, %2743, %2737 : !torch.vtensor<[2,320,64,64],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2745 = torch.aten.round %2744 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2746 = torch.aten.sub.Tensor %2745, %2720, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2747 = torch.aten.mul.Tensor %2746, %2718 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> loc(#loc1)
    %2748 = torch.aten.broadcast_to %107, %1018 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %2749 = torch.aten.clone %2748, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16> loc(#loc1)
    %2750 = torch.aten.view %2749, %1021 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %2751 = torch.aten.mul.Tensor %108, %2750 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16> loc(#loc1)
    %2752 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %2753 = torch.aten.convolution %2747, %2751, %109, %2752, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2754 = torch.aten.clone %2753, %int0 : !torch.vtensor<[2,320,32,32],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2755 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2756 = torch.aten.view %2754, %2755 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16> loc(#loc1)
    %2757 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2758 = torch.aten.to.dtype %2757, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2759 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2760 = torch.aten.broadcast_to %2758, %2759 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
    %2761 = torch.valsem.aten.copy %2760, %2756, %false : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,10,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
    %2762 = torch.aten.to.dtype %2761, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f64> loc(#loc1)
    %2763 = torch.aten.sum.dim_IntList %2762, %943, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2764 = torch.aten.div.Scalar %2763, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2765 = torch.aten.sub.Tensor %2762, %2764, %float1.000000e00 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,1024],f64> loc(#loc1)
    %2766 = torch.aten.mul.Tensor %2765, %2765 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,10,1024],f64> -> !torch.vtensor<[2,32,10,1024],f64> loc(#loc1)
    %2767 = torch.aten.sum.dim_IntList %2766, %943, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2768 = torch.aten.div.Scalar %2767, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2769 = torch.aten.to.dtype %2768, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2770 = torch.aten.sum.dim_IntList %2761, %943, %true, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2771 = torch.aten.div.Scalar %2770, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2772 = torch.aten.add.Tensor %2769, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2773 = torch.aten.rsqrt %2772 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2774 = torch.aten.sub.Tensor %2756, %2771, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
    %2775 = torch.aten.mul.Tensor %2774, %2773 : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,1024],f32> loc(#loc1)
    %2776 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2777 = torch.aten.view %2775, %2776 : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f32> loc(#loc1)
    %2778 = torch.aten.unsqueeze %110, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %2779 = torch.aten.unsqueeze %2778, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %2780 = torch.aten.mul.Tensor %2777, %2779 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f32> loc(#loc1)
    %2781 = torch.aten.unsqueeze %111, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16> loc(#loc1)
    %2782 = torch.aten.unsqueeze %2781, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16> loc(#loc1)
    %2783 = torch.aten.add.Tensor %2780, %2782, %int1 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f32> loc(#loc1)
    %2784 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2785 = torch.aten.to.dtype %2784, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2786 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2787 = torch.aten.broadcast_to %2785, %2786 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2788 = torch.valsem.aten.copy %2787, %2783, %false : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f32>, !torch.bool -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2789 = torch.aten.sigmoid %2788 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2790 = torch.aten.mul.Tensor %2789, %2788 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2791 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2792 = torch.aten.detach %2791 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2793 = torch.prim.ListConstruct %int2, %int20, %int16, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2794 = torch.aten.view %2790, %2793 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %2795 = torch.aten.abs %2794 : !torch.vtensor<[2,20,16,32,32],f16> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %values_56, %indices_57 = torch.aten.max.dim %2795, %int2, %true : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,32,32],f16>, !torch.vtensor<[2,20,1,32,32],si64> loc(#loc1)
    %2796 = torch.prim.ListConstruct %int2, %int20, %int1, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2797 = torch.aten.view %values_56, %2796 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,32,32],f16> loc(#loc1)
    %2798 = torch.aten.broadcast_to %2797, %2793 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %2799 = torch.aten.clone %2798, %int0 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %2800 = torch.aten.view %2799, %2776 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2801 = torch.aten.sub.Tensor %2792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2802 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2803 = torch.aten.pow.Tensor_Tensor %2802, %2801 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2804 = torch.aten.neg %2803 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2805 = torch.aten.neg %2804 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2806 = torch.aten.div.Tensor %2800, %2805 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2807 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2808 = torch.aten.detach %2807 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2809 = torch.aten.div.Tensor %2790, %2806 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2810 = torch.aten.add.Tensor %2809, %2808, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2811 = torch.aten.sub.Tensor %2792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2812 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2813 = torch.aten.pow.Tensor_Tensor %2812, %2811 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2814 = torch.aten.neg %2813 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2815 = torch.aten.sub.Tensor %2792, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2816 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2817 = torch.aten.pow.Tensor_Tensor %2816, %2815 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2818 = torch.aten.sub.Tensor %2817, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2819 = torch.aten.gt.Tensor %2810, %2818 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
    %2820 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2821 = torch.aten.to.dtype %2820, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2822 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2823 = torch.aten.broadcast_to %2821, %2822 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2824 = torch.valsem.aten.copy %2823, %2818, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2825 = torch.aten.where.self %2819, %2824, %2810 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2826 = torch.aten.lt.Tensor %2825, %2814 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
    %2827 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2828 = torch.aten.to.dtype %2827, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2829 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2830 = torch.aten.broadcast_to %2828, %2829 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2831 = torch.valsem.aten.copy %2830, %2814, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2832 = torch.aten.where.self %2826, %2831, %2825 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2833 = torch.aten.round %2832 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2834 = torch.aten.sub.Tensor %2833, %2808, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2835 = torch.aten.mul.Tensor %2834, %2806 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2836 = torch.prim.ListConstruct %int640, %int20, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %2837 = torch.aten.broadcast_to %112, %2836 : !torch.vtensor<[640,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,20,16,3,3],f16> loc(#loc1)
    %2838 = torch.aten.clone %2837, %int0 : !torch.vtensor<[640,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,20,16,3,3],f16> loc(#loc1)
    %2839 = torch.prim.ListConstruct %int640, %int320, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %2840 = torch.aten.view %2838, %2839 : !torch.vtensor<[640,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,320,3,3],f16> loc(#loc1)
    %2841 = torch.aten.mul.Tensor %113, %2840 : !torch.vtensor<[640,320,3,3],si8>, !torch.vtensor<[640,320,3,3],f16> -> !torch.vtensor<[640,320,3,3],f16> loc(#loc1)
    %2842 = torch.aten.convolution %2835, %2841, %114, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2843 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %2844 = torch.aten.mul.Tensor %2843, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %2845 = torch.aten.transpose.int %115, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16> loc(#loc1)
    %2846 = torch.aten.mm %2844, %2845 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %2847 = torch.aten.mul.Scalar %116, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %2848 = torch.aten.add.Tensor %2847, %2846, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %2849 = torch.aten.slice.Tensor %2848, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %2850 = torch.aten.slice.Tensor %2849, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %2851 = torch.aten.unsqueeze %2850, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16> loc(#loc1)
    %2852 = torch.aten.unsqueeze %2851, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16> loc(#loc1)
    %2853 = torch.aten.add.Tensor %2842, %2852, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2854 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2855 = torch.aten.view %2853, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
    %2856 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2857 = torch.aten.to.dtype %2856, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2858 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2859 = torch.aten.broadcast_to %2857, %2858 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %2860 = torch.valsem.aten.copy %2859, %2855, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %2861 = torch.aten.to.dtype %2860, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %2862 = torch.aten.sum.dim_IntList %2861, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2863 = torch.aten.div.Scalar %2862, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2864 = torch.aten.sub.Tensor %2861, %2863, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %2865 = torch.aten.mul.Tensor %2864, %2864 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %2866 = torch.aten.sum.dim_IntList %2865, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2867 = torch.aten.div.Scalar %2866, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %2868 = torch.aten.to.dtype %2867, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2869 = torch.aten.sum.dim_IntList %2860, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2870 = torch.aten.div.Scalar %2869, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2871 = torch.aten.add.Tensor %2868, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2872 = torch.aten.rsqrt %2871 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %2873 = torch.aten.sub.Tensor %2855, %2870, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %2874 = torch.aten.mul.Tensor %2873, %2872 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %2875 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2876 = torch.aten.view %2874, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %2877 = torch.aten.unsqueeze %117, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %2878 = torch.aten.unsqueeze %2877, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %2879 = torch.aten.mul.Tensor %2876, %2878 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %2880 = torch.aten.unsqueeze %118, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %2881 = torch.aten.unsqueeze %2880, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %2882 = torch.aten.add.Tensor %2879, %2881, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %2883 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2884 = torch.aten.to.dtype %2883, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2885 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2886 = torch.aten.broadcast_to %2884, %2885 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2887 = torch.valsem.aten.copy %2886, %2882, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2888 = torch.aten.sigmoid %2887 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2889 = torch.aten.mul.Tensor %2888, %2887 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2890 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2891 = torch.aten.detach %2890 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2892 = torch.prim.ListConstruct %int2, %int40, %int16, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2893 = torch.aten.view %2889, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %2894 = torch.aten.abs %2893 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_58, %indices_59 = torch.aten.max.dim %2894, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %2895 = torch.prim.ListConstruct %int2, %int40, %int1, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %2896 = torch.aten.view %values_58, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %2897 = torch.aten.broadcast_to %2896, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %2898 = torch.aten.clone %2897, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %2899 = torch.aten.view %2898, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2900 = torch.aten.sub.Tensor %2891, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2901 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2902 = torch.aten.pow.Tensor_Tensor %2901, %2900 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2903 = torch.aten.neg %2902 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2904 = torch.aten.neg %2903 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2905 = torch.aten.div.Tensor %2899, %2904 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2906 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2907 = torch.aten.detach %2906 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2908 = torch.aten.div.Tensor %2889, %2905 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2909 = torch.aten.add.Tensor %2908, %2907, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2910 = torch.aten.sub.Tensor %2891, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2911 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2912 = torch.aten.pow.Tensor_Tensor %2911, %2910 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2913 = torch.aten.neg %2912 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2914 = torch.aten.sub.Tensor %2891, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2915 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2916 = torch.aten.pow.Tensor_Tensor %2915, %2914 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2917 = torch.aten.sub.Tensor %2916, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2918 = torch.aten.gt.Tensor %2909, %2917 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %2919 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2920 = torch.aten.to.dtype %2919, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2921 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2922 = torch.aten.broadcast_to %2920, %2921 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2923 = torch.valsem.aten.copy %2922, %2917, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2924 = torch.aten.where.self %2918, %2923, %2909 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2925 = torch.aten.lt.Tensor %2924, %2913 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %2926 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2927 = torch.aten.to.dtype %2926, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2928 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2929 = torch.aten.broadcast_to %2927, %2928 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2930 = torch.valsem.aten.copy %2929, %2913, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2931 = torch.aten.where.self %2925, %2930, %2924 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2932 = torch.aten.round %2931 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2933 = torch.aten.sub.Tensor %2932, %2907, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2934 = torch.aten.mul.Tensor %2933, %2905 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2935 = torch.prim.ListConstruct %int640, %int40, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %2936 = torch.aten.broadcast_to %119, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %2937 = torch.aten.clone %2936, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %2938 = torch.prim.ListConstruct %int640, %int640, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %2939 = torch.aten.view %2937, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %2940 = torch.aten.mul.Tensor %120, %2939 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %2941 = torch.aten.convolution %2934, %2940, %121, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2942 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2943 = torch.aten.detach %2942 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2944 = torch.aten.view %2753, %2793 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %2945 = torch.aten.abs %2944 : !torch.vtensor<[2,20,16,32,32],f16> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %values_60, %indices_61 = torch.aten.max.dim %2945, %int2, %true : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,20,1,32,32],f16>, !torch.vtensor<[2,20,1,32,32],si64> loc(#loc1)
    %2946 = torch.aten.view %values_60, %2796 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,1,32,32],f16> loc(#loc1)
    %2947 = torch.aten.broadcast_to %2946, %2793 : !torch.vtensor<[2,20,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %2948 = torch.aten.clone %2947, %int0 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,20,16,32,32],f16> loc(#loc1)
    %2949 = torch.aten.view %2948, %2776 : !torch.vtensor<[2,20,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2950 = torch.aten.sub.Tensor %2943, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2951 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2952 = torch.aten.pow.Tensor_Tensor %2951, %2950 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2953 = torch.aten.neg %2952 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2954 = torch.aten.neg %2953 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2955 = torch.aten.div.Tensor %2949, %2954 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2956 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2957 = torch.aten.detach %2956 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2958 = torch.aten.div.Tensor %2753, %2955 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2959 = torch.aten.add.Tensor %2958, %2957, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2960 = torch.aten.sub.Tensor %2943, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2961 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2962 = torch.aten.pow.Tensor_Tensor %2961, %2960 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2963 = torch.aten.neg %2962 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2964 = torch.aten.sub.Tensor %2943, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2965 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2966 = torch.aten.pow.Tensor_Tensor %2965, %2964 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %2967 = torch.aten.sub.Tensor %2966, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %2968 = torch.aten.gt.Tensor %2959, %2967 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
    %2969 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2970 = torch.aten.to.dtype %2969, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2971 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2972 = torch.aten.broadcast_to %2970, %2971 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2973 = torch.valsem.aten.copy %2972, %2967, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2974 = torch.aten.where.self %2968, %2973, %2959 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2975 = torch.aten.lt.Tensor %2974, %2963 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,320,32,32],i1> loc(#loc1)
    %2976 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2977 = torch.aten.to.dtype %2976, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %2978 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %2979 = torch.aten.broadcast_to %2977, %2978 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %2980 = torch.valsem.aten.copy %2979, %2963, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %2981 = torch.aten.where.self %2975, %2980, %2974 : !torch.vtensor<[2,320,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2982 = torch.aten.round %2981 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2983 = torch.aten.sub.Tensor %2982, %2957, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2984 = torch.aten.mul.Tensor %2983, %2955 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> loc(#loc1)
    %2985 = torch.prim.ListConstruct %int640, %int20, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %2986 = torch.aten.broadcast_to %122, %2985 : !torch.vtensor<[640,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,20,16,1,1],f16> loc(#loc1)
    %2987 = torch.aten.clone %2986, %int0 : !torch.vtensor<[640,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,20,16,1,1],f16> loc(#loc1)
    %2988 = torch.prim.ListConstruct %int640, %int320, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %2989 = torch.aten.view %2987, %2988 : !torch.vtensor<[640,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,320,1,1],f16> loc(#loc1)
    %2990 = torch.aten.mul.Tensor %123, %2989 : !torch.vtensor<[640,320,1,1],si8>, !torch.vtensor<[640,320,1,1],f16> -> !torch.vtensor<[640,320,1,1],f16> loc(#loc1)
    %2991 = torch.aten.convolution %2984, %2990, %124, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2992 = torch.aten.add.Tensor %2991, %2941, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2993 = torch.aten.div.Tensor %2992, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2994 = torch.aten.clone %2993, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %2995 = torch.aten.view %2994, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
    %2996 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %2997 = torch.aten.to.dtype %2996, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %2998 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %2999 = torch.aten.broadcast_to %2997, %2998 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3000 = torch.valsem.aten.copy %2999, %2995, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3001 = torch.aten.to.dtype %3000, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3002 = torch.aten.sum.dim_IntList %3001, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3003 = torch.aten.div.Scalar %3002, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3004 = torch.aten.sub.Tensor %3001, %3003, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3005 = torch.aten.mul.Tensor %3004, %3004 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3006 = torch.aten.sum.dim_IntList %3005, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3007 = torch.aten.div.Scalar %3006, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3008 = torch.aten.to.dtype %3007, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3009 = torch.aten.sum.dim_IntList %3000, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3010 = torch.aten.div.Scalar %3009, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3011 = torch.aten.add.Tensor %3008, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3012 = torch.aten.rsqrt %3011 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3013 = torch.aten.sub.Tensor %2995, %3010, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3014 = torch.aten.mul.Tensor %3013, %3012 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3015 = torch.aten.view %3014, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3016 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3017 = torch.aten.to.dtype %3016, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3018 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3019 = torch.aten.broadcast_to %3017, %3018 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3020 = torch.valsem.aten.copy %3019, %3015, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3021 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3022 = torch.aten.detach %3021 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3023 = torch.aten.view %3020, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3024 = torch.aten.abs %3023 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_62, %indices_63 = torch.aten.max.dim %3024, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %3025 = torch.aten.view %values_62, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %3026 = torch.aten.broadcast_to %3025, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3027 = torch.aten.clone %3026, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3028 = torch.aten.view %3027, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3029 = torch.aten.sub.Tensor %3022, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3030 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3031 = torch.aten.pow.Tensor_Tensor %3030, %3029 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3032 = torch.aten.neg %3031 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3033 = torch.aten.neg %3032 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3034 = torch.aten.div.Tensor %3028, %3033 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3035 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3036 = torch.aten.detach %3035 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3037 = torch.aten.div.Tensor %3020, %3034 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3038 = torch.aten.add.Tensor %3037, %3036, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3039 = torch.aten.sub.Tensor %3022, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3040 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3041 = torch.aten.pow.Tensor_Tensor %3040, %3039 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3042 = torch.aten.neg %3041 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3043 = torch.aten.sub.Tensor %3022, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3044 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3045 = torch.aten.pow.Tensor_Tensor %3044, %3043 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3046 = torch.aten.sub.Tensor %3045, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3047 = torch.aten.gt.Tensor %3038, %3046 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3048 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3049 = torch.aten.to.dtype %3048, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3050 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3051 = torch.aten.broadcast_to %3049, %3050 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3052 = torch.valsem.aten.copy %3051, %3046, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3053 = torch.aten.where.self %3047, %3052, %3038 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3054 = torch.aten.lt.Tensor %3053, %3042 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3055 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3056 = torch.aten.to.dtype %3055, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3057 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3058 = torch.aten.broadcast_to %3056, %3057 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3059 = torch.valsem.aten.copy %3058, %3042, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3060 = torch.aten.where.self %3054, %3059, %3053 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3061 = torch.aten.round %3060 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3062 = torch.aten.sub.Tensor %3061, %3036, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3063 = torch.aten.mul.Tensor %3062, %3034 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3064 = torch.prim.ListConstruct %int640, %int40, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3065 = torch.aten.broadcast_to %125, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %3066 = torch.aten.clone %3065, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %3067 = torch.prim.ListConstruct %int640, %int640, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3068 = torch.aten.view %3066, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %3069 = torch.aten.mul.Tensor %126, %3068 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %3070 = torch.aten.convolution %3063, %3069, %127, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3071 = torch.aten.permute %3070, %1196 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
    %3072 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %3073 = torch.aten.view %3071, %3072 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3074 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %3075 = torch.aten.sum.dim_IntList %3073, %3074, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3076 = torch.aten.div.Scalar %3075, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3077 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3078 = torch.aten.broadcast_to %3076, %3077 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3079 = torch.aten.sub.Tensor %3073, %3078, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3080 = torch.aten.mul.Tensor %3079, %3079 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3081 = torch.aten.sum.dim_IntList %3080, %3074, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3082 = torch.aten.div.Scalar %3081, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3083 = torch.aten.add.Scalar %3082, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3084 = torch.aten.rsqrt %3083 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3085 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3086 = torch.aten.broadcast_to %3084, %3085 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3087 = torch.aten.mul.Tensor %3079, %3086 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3088 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3089 = torch.aten.detach %3088 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3090 = torch.prim.ListConstruct %int2, %int1024, %int40, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %3091 = torch.aten.view %3087, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3092 = torch.aten.abs %3091 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_64, %indices_65 = torch.aten.max.dim %3092, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3093 = torch.prim.ListConstruct %int2, %int1024, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %3094 = torch.aten.view %values_64, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3095 = torch.aten.broadcast_to %3094, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3096 = torch.aten.clone %3095, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3097 = torch.aten.view %3096, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3098 = torch.aten.sub.Tensor %3089, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3099 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3100 = torch.aten.pow.Tensor_Tensor %3099, %3098 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3101 = torch.aten.neg %3100 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3102 = torch.aten.neg %3101 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3103 = torch.aten.div.Tensor %3097, %3102 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3104 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3105 = torch.aten.detach %3104 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3106 = torch.aten.div.Tensor %3087, %3103 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3107 = torch.aten.add.Tensor %3106, %3105, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3108 = torch.aten.sub.Tensor %3089, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3109 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3110 = torch.aten.pow.Tensor_Tensor %3109, %3108 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3111 = torch.aten.neg %3110 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3112 = torch.aten.sub.Tensor %3089, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3113 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3114 = torch.aten.pow.Tensor_Tensor %3113, %3112 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3115 = torch.aten.sub.Tensor %3114, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3116 = torch.aten.gt.Tensor %3107, %3115 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3117 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3118 = torch.aten.to.dtype %3117, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3119 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3120 = torch.aten.broadcast_to %3118, %3119 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3121 = torch.valsem.aten.copy %3120, %3115, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3122 = torch.aten.where.self %3116, %3121, %3107 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3123 = torch.aten.lt.Tensor %3122, %3111 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3124 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3125 = torch.aten.to.dtype %3124, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3126 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3127 = torch.aten.broadcast_to %3125, %3126 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3128 = torch.valsem.aten.copy %3127, %3111, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3129 = torch.aten.where.self %3123, %3128, %3122 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3130 = torch.aten.round %3129 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3131 = torch.aten.sub.Tensor %3130, %3105, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3132 = torch.aten.mul.Tensor %3131, %3103 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3133 = torch.prim.ListConstruct %int640, %int40, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3134 = torch.aten.broadcast_to %128, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3135 = torch.aten.clone %3134, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3136 = torch.prim.ListConstruct %int640, %int640 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3137 = torch.aten.view %3135, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3138 = torch.aten.mul.Tensor %129, %3137 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3139 = torch.aten.transpose.int %3138, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3140 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %3141 = torch.aten.view %3132, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3142 = torch.aten.mm %3141, %3139 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3143 = torch.aten.mul.Scalar %130, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3144 = torch.aten.add.Tensor %3143, %3142, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3145 = torch.aten.view %3144, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3146 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3147 = torch.aten.detach %3146 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3148 = torch.aten.view %3087, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3149 = torch.aten.abs %3148 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_66, %indices_67 = torch.aten.max.dim %3149, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3150 = torch.aten.view %values_66, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3151 = torch.aten.broadcast_to %3150, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3152 = torch.aten.clone %3151, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3153 = torch.aten.view %3152, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3154 = torch.aten.sub.Tensor %3147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3155 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3156 = torch.aten.pow.Tensor_Tensor %3155, %3154 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3157 = torch.aten.neg %3156 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3158 = torch.aten.neg %3157 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3159 = torch.aten.div.Tensor %3153, %3158 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3160 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3161 = torch.aten.detach %3160 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3162 = torch.aten.div.Tensor %3087, %3159 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3163 = torch.aten.add.Tensor %3162, %3161, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3164 = torch.aten.sub.Tensor %3147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3165 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3166 = torch.aten.pow.Tensor_Tensor %3165, %3164 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3167 = torch.aten.neg %3166 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3168 = torch.aten.sub.Tensor %3147, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3169 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3170 = torch.aten.pow.Tensor_Tensor %3169, %3168 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3171 = torch.aten.sub.Tensor %3170, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3172 = torch.aten.gt.Tensor %3163, %3171 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3173 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3174 = torch.aten.to.dtype %3173, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3175 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3176 = torch.aten.broadcast_to %3174, %3175 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3177 = torch.valsem.aten.copy %3176, %3171, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3178 = torch.aten.where.self %3172, %3177, %3163 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3179 = torch.aten.lt.Tensor %3178, %3167 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3180 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3181 = torch.aten.to.dtype %3180, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3182 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3183 = torch.aten.broadcast_to %3181, %3182 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3184 = torch.valsem.aten.copy %3183, %3167, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3185 = torch.aten.where.self %3179, %3184, %3178 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3186 = torch.aten.round %3185 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3187 = torch.aten.sub.Tensor %3186, %3161, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3188 = torch.aten.mul.Tensor %3187, %3159 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3189 = torch.aten.broadcast_to %131, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3190 = torch.aten.clone %3189, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3191 = torch.aten.view %3190, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3192 = torch.aten.mul.Tensor %132, %3191 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3193 = torch.aten.transpose.int %3192, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3194 = torch.aten.view %3188, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3195 = torch.aten.mm %3194, %3193 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3196 = torch.aten.mul.Scalar %133, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3197 = torch.aten.add.Tensor %3196, %3195, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3198 = torch.aten.view %3197, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3199 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3200 = torch.aten.detach %3199 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3201 = torch.aten.view %3087, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3202 = torch.aten.abs %3201 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_68, %indices_69 = torch.aten.max.dim %3202, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3203 = torch.aten.view %values_68, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3204 = torch.aten.broadcast_to %3203, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3205 = torch.aten.clone %3204, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3206 = torch.aten.view %3205, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3207 = torch.aten.sub.Tensor %3200, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3208 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3209 = torch.aten.pow.Tensor_Tensor %3208, %3207 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3210 = torch.aten.neg %3209 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3211 = torch.aten.neg %3210 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3212 = torch.aten.div.Tensor %3206, %3211 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3213 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3214 = torch.aten.detach %3213 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3215 = torch.aten.div.Tensor %3087, %3212 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3216 = torch.aten.add.Tensor %3215, %3214, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3217 = torch.aten.sub.Tensor %3200, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3218 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3219 = torch.aten.pow.Tensor_Tensor %3218, %3217 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3220 = torch.aten.neg %3219 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3221 = torch.aten.sub.Tensor %3200, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3222 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3223 = torch.aten.pow.Tensor_Tensor %3222, %3221 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3224 = torch.aten.sub.Tensor %3223, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3225 = torch.aten.gt.Tensor %3216, %3224 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3226 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3227 = torch.aten.to.dtype %3226, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3228 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3229 = torch.aten.broadcast_to %3227, %3228 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3230 = torch.valsem.aten.copy %3229, %3224, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3231 = torch.aten.where.self %3225, %3230, %3216 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3232 = torch.aten.lt.Tensor %3231, %3220 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3233 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3234 = torch.aten.to.dtype %3233, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3235 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3236 = torch.aten.broadcast_to %3234, %3235 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3237 = torch.valsem.aten.copy %3236, %3220, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3238 = torch.aten.where.self %3232, %3237, %3231 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3239 = torch.aten.round %3238 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3240 = torch.aten.sub.Tensor %3239, %3214, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3241 = torch.aten.mul.Tensor %3240, %3212 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3242 = torch.aten.broadcast_to %134, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3243 = torch.aten.clone %3242, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3244 = torch.aten.view %3243, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3245 = torch.aten.mul.Tensor %135, %3244 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3246 = torch.aten.transpose.int %3245, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3247 = torch.aten.view %3241, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3248 = torch.aten.mm %3247, %3246 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3249 = torch.aten.mul.Scalar %136, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3250 = torch.aten.add.Tensor %3249, %3248, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3251 = torch.aten.view %3250, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3252 = torch.prim.ListConstruct %int2, %int1024, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %3253 = torch.aten.view %3145, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3254 = torch.aten.permute %3253, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3255 = torch.aten.clone %3254, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3256 = torch.prim.ListConstruct %int16, %int1024, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3257 = torch.aten.view %3255, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3258 = torch.aten.view %3198, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3259 = torch.aten.permute %3258, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3260 = torch.aten.clone %3259, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3261 = torch.aten.view %3260, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3262 = torch.aten.view %3251, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3263 = torch.aten.permute %3262, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3264 = torch.aten.clone %3263, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3265 = torch.aten.view %3264, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3266 = torch.aten.transpose.int %3261, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
    %3267 = torch.aten.broadcast_to %3257, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3268 = torch.aten.view %3267, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3269 = torch.prim.ListConstruct %int16, %int80, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3270 = torch.aten.broadcast_to %3266, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
    %3271 = torch.aten.view %3270, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
    %3272 = torch.aten.bmm %3268, %3271 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3273 = torch.prim.ListConstruct %int16, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3274 = torch.aten.view %3272, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3275 = torch.aten.mul.Tensor %3274, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %values_70, %indices_71 = torch.aten.max.dim %3275, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
    %3276 = torch.aten.sub.Tensor %3275, %values_70, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3277 = torch.aten.exp %3276 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3278 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %3279 = torch.aten.sum.dim_IntList %3277, %3278, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
    %3280 = torch.aten.div.Tensor %3277, %3279 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3281 = torch.aten.broadcast_to %3280, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3282 = torch.aten.view %3281, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %3283 = torch.aten.broadcast_to %3265, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3284 = torch.aten.view %3283, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3285 = torch.aten.bmm %3282, %3284 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3286 = torch.aten.view %3285, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3287 = torch.prim.ListConstruct %int2, %int8, %int1024, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %3288 = torch.aten.view %3286, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3289 = torch.aten.permute %3288, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3290 = torch.aten.clone %3289, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3291 = torch.aten.view %3290, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3292 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3293 = torch.aten.detach %3292 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3294 = torch.aten.view %3291, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3295 = torch.aten.abs %3294 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_72, %indices_73 = torch.aten.max.dim %3295, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3296 = torch.aten.view %values_72, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3297 = torch.aten.broadcast_to %3296, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3298 = torch.aten.clone %3297, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3299 = torch.aten.view %3298, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3300 = torch.aten.sub.Tensor %3293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3301 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3302 = torch.aten.pow.Tensor_Tensor %3301, %3300 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3303 = torch.aten.neg %3302 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3304 = torch.aten.neg %3303 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3305 = torch.aten.div.Tensor %3299, %3304 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3306 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3307 = torch.aten.detach %3306 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3308 = torch.aten.div.Tensor %3291, %3305 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3309 = torch.aten.add.Tensor %3308, %3307, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3310 = torch.aten.sub.Tensor %3293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3311 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3312 = torch.aten.pow.Tensor_Tensor %3311, %3310 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3313 = torch.aten.neg %3312 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3314 = torch.aten.sub.Tensor %3293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3315 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3316 = torch.aten.pow.Tensor_Tensor %3315, %3314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3317 = torch.aten.sub.Tensor %3316, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3318 = torch.aten.gt.Tensor %3309, %3317 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3319 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3320 = torch.aten.to.dtype %3319, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3321 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3322 = torch.aten.broadcast_to %3320, %3321 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3323 = torch.valsem.aten.copy %3322, %3317, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3324 = torch.aten.where.self %3318, %3323, %3309 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3325 = torch.aten.lt.Tensor %3324, %3313 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3327 = torch.aten.to.dtype %3326, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3328 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3329 = torch.aten.broadcast_to %3327, %3328 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3330 = torch.valsem.aten.copy %3329, %3313, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3331 = torch.aten.where.self %3325, %3330, %3324 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3332 = torch.aten.round %3331 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3333 = torch.aten.sub.Tensor %3332, %3307, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3334 = torch.aten.mul.Tensor %3333, %3305 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3335 = torch.aten.broadcast_to %137, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3336 = torch.aten.clone %3335, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3337 = torch.aten.view %3336, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3338 = torch.aten.mul.Tensor %138, %3337 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3339 = torch.aten.transpose.int %3338, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3340 = torch.aten.view %3334, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3341 = torch.aten.mm %3340, %3339 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3342 = torch.aten.mul.Scalar %139, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3343 = torch.aten.add.Tensor %3342, %3341, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3344 = torch.aten.view %3343, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3345 = torch.aten.add.Tensor %3344, %3073, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3346 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %3347 = torch.aten.sum.dim_IntList %3345, %3346, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3348 = torch.aten.div.Scalar %3347, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3349 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3350 = torch.aten.broadcast_to %3348, %3349 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3351 = torch.aten.sub.Tensor %3345, %3350, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3352 = torch.aten.mul.Tensor %3351, %3351 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3353 = torch.aten.sum.dim_IntList %3352, %3346, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3354 = torch.aten.div.Scalar %3353, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3355 = torch.aten.add.Scalar %3354, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3356 = torch.aten.rsqrt %3355 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3357 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3358 = torch.aten.broadcast_to %3356, %3357 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3359 = torch.aten.mul.Tensor %3351, %3358 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3360 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3361 = torch.aten.detach %3360 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3362 = torch.aten.view %3359, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3363 = torch.aten.abs %3362 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_74, %indices_75 = torch.aten.max.dim %3363, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3364 = torch.aten.view %values_74, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3365 = torch.aten.broadcast_to %3364, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3366 = torch.aten.clone %3365, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3367 = torch.aten.view %3366, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3368 = torch.aten.sub.Tensor %3361, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3369 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3370 = torch.aten.pow.Tensor_Tensor %3369, %3368 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3371 = torch.aten.neg %3370 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3372 = torch.aten.neg %3371 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3373 = torch.aten.div.Tensor %3367, %3372 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3374 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3375 = torch.aten.detach %3374 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3376 = torch.aten.div.Tensor %3359, %3373 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3377 = torch.aten.add.Tensor %3376, %3375, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3378 = torch.aten.sub.Tensor %3361, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3379 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3380 = torch.aten.pow.Tensor_Tensor %3379, %3378 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3381 = torch.aten.neg %3380 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3382 = torch.aten.sub.Tensor %3361, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3383 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3384 = torch.aten.pow.Tensor_Tensor %3383, %3382 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3385 = torch.aten.sub.Tensor %3384, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3386 = torch.aten.gt.Tensor %3377, %3385 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3387 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3388 = torch.aten.to.dtype %3387, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3389 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3390 = torch.aten.broadcast_to %3388, %3389 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3391 = torch.valsem.aten.copy %3390, %3385, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3392 = torch.aten.where.self %3386, %3391, %3377 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3393 = torch.aten.lt.Tensor %3392, %3381 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3394 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3395 = torch.aten.to.dtype %3394, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3396 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3397 = torch.aten.broadcast_to %3395, %3396 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3398 = torch.valsem.aten.copy %3397, %3381, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3399 = torch.aten.where.self %3393, %3398, %3392 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3400 = torch.aten.round %3399 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3401 = torch.aten.sub.Tensor %3400, %3375, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3402 = torch.aten.mul.Tensor %3401, %3373 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3403 = torch.aten.broadcast_to %140, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3404 = torch.aten.clone %3403, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3405 = torch.aten.view %3404, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3406 = torch.aten.mul.Tensor %141, %3405 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3407 = torch.aten.transpose.int %3406, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3408 = torch.aten.view %3402, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3409 = torch.aten.mm %3408, %3407 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3410 = torch.aten.mul.Scalar %142, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3411 = torch.aten.add.Tensor %3410, %3409, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3412 = torch.aten.view %3411, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3413 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3414 = torch.aten.detach %3413 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3415 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %3416 = torch.aten.abs %3415 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_76, %indices_77 = torch.aten.max.dim %3416, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %3417 = torch.aten.view %values_76, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %3418 = torch.aten.broadcast_to %3417, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %3419 = torch.aten.clone %3418, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %3420 = torch.aten.view %3419, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3421 = torch.aten.sub.Tensor %3414, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3422 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3423 = torch.aten.pow.Tensor_Tensor %3422, %3421 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3424 = torch.aten.neg %3423 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3425 = torch.aten.neg %3424 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3426 = torch.aten.div.Tensor %3420, %3425 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3427 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3428 = torch.aten.detach %3427 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3429 = torch.aten.div.Tensor %arg2, %3426 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3430 = torch.aten.add.Tensor %3429, %3428, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3431 = torch.aten.sub.Tensor %3414, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3432 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3433 = torch.aten.pow.Tensor_Tensor %3432, %3431 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3434 = torch.aten.neg %3433 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3435 = torch.aten.sub.Tensor %3414, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3436 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3437 = torch.aten.pow.Tensor_Tensor %3436, %3435 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3438 = torch.aten.sub.Tensor %3437, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3439 = torch.aten.gt.Tensor %3430, %3438 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %3440 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3441 = torch.aten.to.dtype %3440, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3442 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3443 = torch.aten.broadcast_to %3441, %3442 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3444 = torch.valsem.aten.copy %3443, %3438, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3445 = torch.aten.where.self %3439, %3444, %3430 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3446 = torch.aten.lt.Tensor %3445, %3434 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %3447 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3448 = torch.aten.to.dtype %3447, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3449 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3450 = torch.aten.broadcast_to %3448, %3449 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3451 = torch.valsem.aten.copy %3450, %3434, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3452 = torch.aten.where.self %3446, %3451, %3445 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3453 = torch.aten.round %3452 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3454 = torch.aten.sub.Tensor %3453, %3428, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3455 = torch.aten.mul.Tensor %3454, %3426 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3456 = torch.prim.ListConstruct %int640, %int48, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3457 = torch.aten.broadcast_to %143, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %3458 = torch.aten.clone %3457, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %3459 = torch.prim.ListConstruct %int640, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3460 = torch.aten.view %3458, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %3461 = torch.aten.mul.Tensor %144, %3460 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %3462 = torch.aten.transpose.int %3461, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
    %3463 = torch.aten.view %3455, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %3464 = torch.aten.mm %3463, %3462 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
    %3465 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %3466 = torch.aten.view %3464, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
    %3467 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3468 = torch.aten.detach %3467 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3469 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %3470 = torch.aten.abs %3469 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_78, %indices_79 = torch.aten.max.dim %3470, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %3471 = torch.aten.view %values_78, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %3472 = torch.aten.broadcast_to %3471, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %3473 = torch.aten.clone %3472, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %3474 = torch.aten.view %3473, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3475 = torch.aten.sub.Tensor %3468, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3476 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3477 = torch.aten.pow.Tensor_Tensor %3476, %3475 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3478 = torch.aten.neg %3477 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3479 = torch.aten.neg %3478 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3480 = torch.aten.div.Tensor %3474, %3479 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3481 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3482 = torch.aten.detach %3481 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3483 = torch.aten.div.Tensor %arg2, %3480 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3484 = torch.aten.add.Tensor %3483, %3482, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3485 = torch.aten.sub.Tensor %3468, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3486 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3487 = torch.aten.pow.Tensor_Tensor %3486, %3485 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3488 = torch.aten.neg %3487 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3489 = torch.aten.sub.Tensor %3468, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3490 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3491 = torch.aten.pow.Tensor_Tensor %3490, %3489 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3492 = torch.aten.sub.Tensor %3491, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3493 = torch.aten.gt.Tensor %3484, %3492 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %3494 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3495 = torch.aten.to.dtype %3494, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3496 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3497 = torch.aten.broadcast_to %3495, %3496 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3498 = torch.valsem.aten.copy %3497, %3492, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3499 = torch.aten.where.self %3493, %3498, %3484 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3500 = torch.aten.lt.Tensor %3499, %3488 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %3501 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3502 = torch.aten.to.dtype %3501, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3503 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3504 = torch.aten.broadcast_to %3502, %3503 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3505 = torch.valsem.aten.copy %3504, %3488, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3506 = torch.aten.where.self %3500, %3505, %3499 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3507 = torch.aten.round %3506 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3508 = torch.aten.sub.Tensor %3507, %3482, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3509 = torch.aten.mul.Tensor %3508, %3480 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %3510 = torch.aten.broadcast_to %145, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %3511 = torch.aten.clone %3510, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %3512 = torch.aten.view %3511, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %3513 = torch.aten.mul.Tensor %146, %3512 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %3514 = torch.aten.transpose.int %3513, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
    %3515 = torch.aten.view %3509, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %3516 = torch.aten.mm %3515, %3514 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
    %3517 = torch.aten.view %3516, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
    %3518 = torch.aten.view %3412, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3519 = torch.aten.permute %3518, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3520 = torch.aten.clone %3519, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3521 = torch.aten.view %3520, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3522 = torch.prim.ListConstruct %int2, %int77, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %3523 = torch.aten.view %3466, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
    %3524 = torch.aten.permute %3523, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %3525 = torch.aten.clone %3524, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %3526 = torch.prim.ListConstruct %int16, %int77, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3527 = torch.aten.view %3525, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %3528 = torch.aten.view %3517, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
    %3529 = torch.aten.permute %3528, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %3530 = torch.aten.clone %3529, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %3531 = torch.aten.view %3530, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %3532 = torch.aten.transpose.int %3527, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
    %3533 = torch.aten.broadcast_to %3521, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3534 = torch.aten.view %3533, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3535 = torch.prim.ListConstruct %int16, %int80, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3536 = torch.aten.broadcast_to %3532, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
    %3537 = torch.aten.view %3536, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
    %3538 = torch.aten.bmm %3534, %3537 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3539 = torch.prim.ListConstruct %int16, %int1024, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3540 = torch.aten.view %3538, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3541 = torch.aten.mul.Tensor %3540, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %values_80, %indices_81 = torch.aten.max.dim %3541, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
    %3542 = torch.aten.sub.Tensor %3541, %values_80, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3543 = torch.aten.exp %3542 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3544 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %3545 = torch.aten.sum.dim_IntList %3543, %3544, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
    %3546 = torch.aten.div.Tensor %3543, %3545 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3547 = torch.aten.broadcast_to %3546, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3548 = torch.aten.view %3547, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %3549 = torch.aten.broadcast_to %3531, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %3550 = torch.aten.view %3549, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %3551 = torch.aten.bmm %3548, %3550 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3552 = torch.aten.view %3551, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %3553 = torch.aten.view %3552, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %3554 = torch.aten.permute %3553, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3555 = torch.aten.clone %3554, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %3556 = torch.aten.view %3555, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3557 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3558 = torch.aten.detach %3557 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3559 = torch.aten.view %3556, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3560 = torch.aten.abs %3559 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_82, %indices_83 = torch.aten.max.dim %3560, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3561 = torch.aten.view %values_82, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3562 = torch.aten.broadcast_to %3561, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3563 = torch.aten.clone %3562, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3564 = torch.aten.view %3563, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3565 = torch.aten.sub.Tensor %3558, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3566 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3567 = torch.aten.pow.Tensor_Tensor %3566, %3565 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3568 = torch.aten.neg %3567 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3569 = torch.aten.neg %3568 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3570 = torch.aten.div.Tensor %3564, %3569 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3571 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3572 = torch.aten.detach %3571 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3573 = torch.aten.div.Tensor %3556, %3570 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3574 = torch.aten.add.Tensor %3573, %3572, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3575 = torch.aten.sub.Tensor %3558, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3576 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3577 = torch.aten.pow.Tensor_Tensor %3576, %3575 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3578 = torch.aten.neg %3577 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3579 = torch.aten.sub.Tensor %3558, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3580 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3581 = torch.aten.pow.Tensor_Tensor %3580, %3579 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3582 = torch.aten.sub.Tensor %3581, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3583 = torch.aten.gt.Tensor %3574, %3582 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3584 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3585 = torch.aten.to.dtype %3584, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3586 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3587 = torch.aten.broadcast_to %3585, %3586 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3588 = torch.valsem.aten.copy %3587, %3582, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3589 = torch.aten.where.self %3583, %3588, %3574 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3590 = torch.aten.lt.Tensor %3589, %3578 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %3591 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3592 = torch.aten.to.dtype %3591, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3593 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3594 = torch.aten.broadcast_to %3592, %3593 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3595 = torch.valsem.aten.copy %3594, %3578, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3596 = torch.aten.where.self %3590, %3595, %3589 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3597 = torch.aten.round %3596 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3598 = torch.aten.sub.Tensor %3597, %3572, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3599 = torch.aten.mul.Tensor %3598, %3570 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3600 = torch.aten.broadcast_to %147, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3601 = torch.aten.clone %3600, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %3602 = torch.aten.view %3601, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3603 = torch.aten.mul.Tensor %148, %3602 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3604 = torch.aten.transpose.int %3603, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %3605 = torch.aten.view %3599, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3606 = torch.aten.mm %3605, %3604 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3607 = torch.aten.mul.Scalar %149, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3608 = torch.aten.add.Tensor %3607, %3606, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3609 = torch.aten.view %3608, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3610 = torch.aten.add.Tensor %3609, %3345, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3611 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %3612 = torch.aten.sum.dim_IntList %3610, %3611, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3613 = torch.aten.div.Scalar %3612, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3614 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3615 = torch.aten.broadcast_to %3613, %3614 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3616 = torch.aten.sub.Tensor %3610, %3615, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3617 = torch.aten.mul.Tensor %3616, %3616 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3618 = torch.aten.sum.dim_IntList %3617, %3611, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3619 = torch.aten.div.Scalar %3618, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3620 = torch.aten.add.Scalar %3619, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3621 = torch.aten.rsqrt %3620 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3622 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3623 = torch.aten.broadcast_to %3621, %3622 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3624 = torch.aten.mul.Tensor %3616, %3623 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3625 = torch.prim.ListConstruct %int5120, %int40, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3626 = torch.aten.broadcast_to %150, %3625 : !torch.vtensor<[5120,40,1],f16>, !torch.list<int> -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
    %3627 = torch.aten.clone %3626, %int0 : !torch.vtensor<[5120,40,16],f16>, !torch.int -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
    %3628 = torch.prim.ListConstruct %int5120, %int640 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3629 = torch.aten.view %3627, %3628 : !torch.vtensor<[5120,40,16],f16>, !torch.list<int> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
    %3630 = torch.aten.mul.Tensor %151, %3629 : !torch.vtensor<[5120,640],si8>, !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
    %3631 = torch.aten.transpose.int %3630, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16> loc(#loc1)
    %3632 = torch.aten.view %3624, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3633 = torch.aten.mm %3632, %3631 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
    %3634 = torch.aten.mul.Scalar %152, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16> loc(#loc1)
    %3635 = torch.aten.add.Tensor %3634, %3633, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
    %3636 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %3637 = torch.aten.view %3635, %3636 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16> loc(#loc1)
    %3638 = torch.aten.slice.Tensor %3637, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %3639 = torch.aten.slice.Tensor %3637, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %3640 = torch.aten.gelu %3639, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %3641 = torch.aten.mul.Tensor %3638, %3640 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %3642 = torch.prim.ListConstruct %int640, %int160, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %3643 = torch.aten.broadcast_to %153, %3642 : !torch.vtensor<[640,160,1],f16>, !torch.list<int> -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
    %3644 = torch.aten.clone %3643, %int0 : !torch.vtensor<[640,160,16],f16>, !torch.int -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
    %3645 = torch.prim.ListConstruct %int640, %int2560 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %3646 = torch.aten.view %3644, %3645 : !torch.vtensor<[640,160,16],f16>, !torch.list<int> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
    %3647 = torch.aten.mul.Tensor %154, %3646 : !torch.vtensor<[640,2560],si8>, !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
    %3648 = torch.aten.transpose.int %3647, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16> loc(#loc1)
    %3649 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %3650 = torch.aten.view %3641, %3649 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16> loc(#loc1)
    %3651 = torch.aten.mm %3650, %3648 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3652 = torch.aten.mul.Scalar %155, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3653 = torch.aten.add.Tensor %3652, %3651, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %3654 = torch.aten.view %3653, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3655 = torch.aten.add.Tensor %3654, %3610, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3656 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %3657 = torch.aten.view %3655, %3656 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
    %3658 = torch.aten.permute %3657, %1789 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3659 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3660 = torch.aten.detach %3659 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3661 = torch.aten.view %3658, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3662 = torch.aten.abs %3661 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_84, %indices_85 = torch.aten.max.dim %3662, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %3663 = torch.aten.view %values_84, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %3664 = torch.aten.broadcast_to %3663, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3665 = torch.aten.clone %3664, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3666 = torch.aten.view %3665, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3667 = torch.aten.sub.Tensor %3660, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3668 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3669 = torch.aten.pow.Tensor_Tensor %3668, %3667 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3670 = torch.aten.neg %3669 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3671 = torch.aten.neg %3670 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3672 = torch.aten.div.Tensor %3666, %3671 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3673 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3674 = torch.aten.detach %3673 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3675 = torch.aten.div.Tensor %3658, %3672 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3676 = torch.aten.add.Tensor %3675, %3674, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3677 = torch.aten.sub.Tensor %3660, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3678 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3679 = torch.aten.pow.Tensor_Tensor %3678, %3677 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3680 = torch.aten.neg %3679 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3681 = torch.aten.sub.Tensor %3660, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3682 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3683 = torch.aten.pow.Tensor_Tensor %3682, %3681 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3684 = torch.aten.sub.Tensor %3683, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3685 = torch.aten.gt.Tensor %3676, %3684 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3686 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3687 = torch.aten.to.dtype %3686, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3688 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3689 = torch.aten.broadcast_to %3687, %3688 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3690 = torch.valsem.aten.copy %3689, %3684, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3691 = torch.aten.where.self %3685, %3690, %3676 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3692 = torch.aten.lt.Tensor %3691, %3680 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3693 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3694 = torch.aten.to.dtype %3693, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3695 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3696 = torch.aten.broadcast_to %3694, %3695 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3697 = torch.valsem.aten.copy %3696, %3680, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3698 = torch.aten.where.self %3692, %3697, %3691 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3699 = torch.aten.round %3698 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3700 = torch.aten.sub.Tensor %3699, %3674, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3701 = torch.aten.mul.Tensor %3700, %3672 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3702 = torch.aten.broadcast_to %156, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %3703 = torch.aten.clone %3702, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %3704 = torch.aten.view %3703, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %3705 = torch.aten.mul.Tensor %157, %3704 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %3706 = torch.aten.convolution %3701, %3705, %158, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3707 = torch.aten.add.Tensor %3706, %2993, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3708 = torch.aten.clone %3707, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3709 = torch.aten.view %3708, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
    %3710 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3711 = torch.aten.to.dtype %3710, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3712 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3713 = torch.aten.broadcast_to %3711, %3712 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3714 = torch.valsem.aten.copy %3713, %3709, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3715 = torch.aten.to.dtype %3714, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3716 = torch.aten.sum.dim_IntList %3715, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3717 = torch.aten.div.Scalar %3716, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3718 = torch.aten.sub.Tensor %3715, %3717, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3719 = torch.aten.mul.Tensor %3718, %3718 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3720 = torch.aten.sum.dim_IntList %3719, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3721 = torch.aten.div.Scalar %3720, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3722 = torch.aten.to.dtype %3721, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3723 = torch.aten.sum.dim_IntList %3714, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3724 = torch.aten.div.Scalar %3723, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3725 = torch.aten.add.Tensor %3722, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3726 = torch.aten.rsqrt %3725 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3727 = torch.aten.sub.Tensor %3709, %3724, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3728 = torch.aten.mul.Tensor %3727, %3726 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3729 = torch.aten.view %3728, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3730 = torch.aten.unsqueeze %159, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %3731 = torch.aten.unsqueeze %3730, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %3732 = torch.aten.mul.Tensor %3729, %3731 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3733 = torch.aten.unsqueeze %160, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %3734 = torch.aten.unsqueeze %3733, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %3735 = torch.aten.add.Tensor %3732, %3734, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3736 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3737 = torch.aten.to.dtype %3736, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3738 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3739 = torch.aten.broadcast_to %3737, %3738 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3740 = torch.valsem.aten.copy %3739, %3735, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3741 = torch.aten.sigmoid %3740 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3742 = torch.aten.mul.Tensor %3741, %3740 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3743 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3744 = torch.aten.detach %3743 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3745 = torch.aten.view %3742, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3746 = torch.aten.abs %3745 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_86, %indices_87 = torch.aten.max.dim %3746, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %3747 = torch.aten.view %values_86, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %3748 = torch.aten.broadcast_to %3747, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3749 = torch.aten.clone %3748, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3750 = torch.aten.view %3749, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3751 = torch.aten.sub.Tensor %3744, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3752 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3753 = torch.aten.pow.Tensor_Tensor %3752, %3751 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3754 = torch.aten.neg %3753 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3755 = torch.aten.neg %3754 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3756 = torch.aten.div.Tensor %3750, %3755 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3757 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3758 = torch.aten.detach %3757 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3759 = torch.aten.div.Tensor %3742, %3756 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3760 = torch.aten.add.Tensor %3759, %3758, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3761 = torch.aten.sub.Tensor %3744, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3762 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3763 = torch.aten.pow.Tensor_Tensor %3762, %3761 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3764 = torch.aten.neg %3763 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3765 = torch.aten.sub.Tensor %3744, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3766 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3767 = torch.aten.pow.Tensor_Tensor %3766, %3765 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3768 = torch.aten.sub.Tensor %3767, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3769 = torch.aten.gt.Tensor %3760, %3768 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3770 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3771 = torch.aten.to.dtype %3770, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3772 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3773 = torch.aten.broadcast_to %3771, %3772 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3774 = torch.valsem.aten.copy %3773, %3768, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3775 = torch.aten.where.self %3769, %3774, %3760 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3776 = torch.aten.lt.Tensor %3775, %3764 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3777 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3778 = torch.aten.to.dtype %3777, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3779 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3780 = torch.aten.broadcast_to %3778, %3779 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3781 = torch.valsem.aten.copy %3780, %3764, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3782 = torch.aten.where.self %3776, %3781, %3775 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3783 = torch.aten.round %3782 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3784 = torch.aten.sub.Tensor %3783, %3758, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3785 = torch.aten.mul.Tensor %3784, %3756 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3786 = torch.aten.broadcast_to %161, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %3787 = torch.aten.clone %3786, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %3788 = torch.aten.view %3787, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %3789 = torch.aten.mul.Tensor %162, %3788 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %3790 = torch.aten.convolution %3785, %3789, %163, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3791 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %3792 = torch.aten.mul.Tensor %3791, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %3793 = torch.aten.transpose.int %164, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16> loc(#loc1)
    %3794 = torch.aten.mm %3792, %3793 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %3795 = torch.aten.mul.Scalar %165, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %3796 = torch.aten.add.Tensor %3795, %3794, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %3797 = torch.aten.slice.Tensor %3796, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %3798 = torch.aten.slice.Tensor %3797, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16> loc(#loc1)
    %3799 = torch.aten.unsqueeze %3798, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16> loc(#loc1)
    %3800 = torch.aten.unsqueeze %3799, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16> loc(#loc1)
    %3801 = torch.aten.add.Tensor %3790, %3800, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3802 = torch.aten.view %3801, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
    %3803 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3804 = torch.aten.to.dtype %3803, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3805 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3806 = torch.aten.broadcast_to %3804, %3805 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3807 = torch.valsem.aten.copy %3806, %3802, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3808 = torch.aten.to.dtype %3807, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3809 = torch.aten.sum.dim_IntList %3808, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3810 = torch.aten.div.Scalar %3809, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3811 = torch.aten.sub.Tensor %3808, %3810, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3812 = torch.aten.mul.Tensor %3811, %3811 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3813 = torch.aten.sum.dim_IntList %3812, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3814 = torch.aten.div.Scalar %3813, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3815 = torch.aten.to.dtype %3814, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3816 = torch.aten.sum.dim_IntList %3807, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3817 = torch.aten.div.Scalar %3816, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3818 = torch.aten.add.Tensor %3815, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3819 = torch.aten.rsqrt %3818 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3820 = torch.aten.sub.Tensor %3802, %3817, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3821 = torch.aten.mul.Tensor %3820, %3819 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3822 = torch.aten.view %3821, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3823 = torch.aten.unsqueeze %166, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %3824 = torch.aten.unsqueeze %3823, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %3825 = torch.aten.mul.Tensor %3822, %3824 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3826 = torch.aten.unsqueeze %167, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %3827 = torch.aten.unsqueeze %3826, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %3828 = torch.aten.add.Tensor %3825, %3827, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3829 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3830 = torch.aten.to.dtype %3829, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3831 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3832 = torch.aten.broadcast_to %3830, %3831 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3833 = torch.valsem.aten.copy %3832, %3828, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3834 = torch.aten.sigmoid %3833 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3835 = torch.aten.mul.Tensor %3834, %3833 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3836 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3837 = torch.aten.detach %3836 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3838 = torch.aten.view %3835, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3839 = torch.aten.abs %3838 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_88, %indices_89 = torch.aten.max.dim %3839, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %3840 = torch.aten.view %values_88, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %3841 = torch.aten.broadcast_to %3840, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3842 = torch.aten.clone %3841, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3843 = torch.aten.view %3842, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3844 = torch.aten.sub.Tensor %3837, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3845 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3846 = torch.aten.pow.Tensor_Tensor %3845, %3844 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3847 = torch.aten.neg %3846 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3848 = torch.aten.neg %3847 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3849 = torch.aten.div.Tensor %3843, %3848 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3850 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3851 = torch.aten.detach %3850 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3852 = torch.aten.div.Tensor %3835, %3849 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3853 = torch.aten.add.Tensor %3852, %3851, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3854 = torch.aten.sub.Tensor %3837, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3855 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3856 = torch.aten.pow.Tensor_Tensor %3855, %3854 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3857 = torch.aten.neg %3856 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3858 = torch.aten.sub.Tensor %3837, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3859 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3860 = torch.aten.pow.Tensor_Tensor %3859, %3858 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3861 = torch.aten.sub.Tensor %3860, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3862 = torch.aten.gt.Tensor %3853, %3861 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3863 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3864 = torch.aten.to.dtype %3863, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3865 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3866 = torch.aten.broadcast_to %3864, %3865 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3867 = torch.valsem.aten.copy %3866, %3861, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3868 = torch.aten.where.self %3862, %3867, %3853 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3869 = torch.aten.lt.Tensor %3868, %3857 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3870 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3871 = torch.aten.to.dtype %3870, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3872 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3873 = torch.aten.broadcast_to %3871, %3872 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3874 = torch.valsem.aten.copy %3873, %3857, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3875 = torch.aten.where.self %3869, %3874, %3868 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3876 = torch.aten.round %3875 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3877 = torch.aten.sub.Tensor %3876, %3851, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3878 = torch.aten.mul.Tensor %3877, %3849 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3879 = torch.aten.broadcast_to %168, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %3880 = torch.aten.clone %3879, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %3881 = torch.aten.view %3880, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %3882 = torch.aten.mul.Tensor %169, %3881 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %3883 = torch.aten.convolution %3878, %3882, %170, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3884 = torch.aten.add.Tensor %3707, %3883, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3885 = torch.aten.div.Tensor %3884, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3886 = torch.aten.clone %3885, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3887 = torch.aten.view %3886, %2854 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> loc(#loc1)
    %3888 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3889 = torch.aten.to.dtype %3888, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3890 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3891 = torch.aten.broadcast_to %3889, %3890 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3892 = torch.valsem.aten.copy %3891, %3887, %false : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,20,1024],f16>, !torch.bool -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3893 = torch.aten.to.dtype %3892, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3894 = torch.aten.sum.dim_IntList %3893, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3895 = torch.aten.div.Scalar %3894, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3896 = torch.aten.sub.Tensor %3893, %3895, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3897 = torch.aten.mul.Tensor %3896, %3896 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> loc(#loc1)
    %3898 = torch.aten.sum.dim_IntList %3897, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3899 = torch.aten.div.Scalar %3898, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %3900 = torch.aten.to.dtype %3899, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3901 = torch.aten.sum.dim_IntList %3892, %943, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3902 = torch.aten.div.Scalar %3901, %int20480 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3903 = torch.aten.add.Tensor %3900, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3904 = torch.aten.rsqrt %3903 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %3905 = torch.aten.sub.Tensor %3887, %3902, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3906 = torch.aten.mul.Tensor %3905, %3904 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32> loc(#loc1)
    %3907 = torch.aten.view %3906, %2875 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32> loc(#loc1)
    %3908 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3909 = torch.aten.to.dtype %3908, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3910 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3911 = torch.aten.broadcast_to %3909, %3910 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3912 = torch.valsem.aten.copy %3911, %3907, %false : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f32>, !torch.bool -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3913 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3914 = torch.aten.detach %3913 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3915 = torch.aten.view %3912, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3916 = torch.aten.abs %3915 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_90, %indices_91 = torch.aten.max.dim %3916, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %3917 = torch.aten.view %values_90, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %3918 = torch.aten.broadcast_to %3917, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3919 = torch.aten.clone %3918, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %3920 = torch.aten.view %3919, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3921 = torch.aten.sub.Tensor %3914, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3922 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3923 = torch.aten.pow.Tensor_Tensor %3922, %3921 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3924 = torch.aten.neg %3923 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3925 = torch.aten.neg %3924 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3926 = torch.aten.div.Tensor %3920, %3925 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3927 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3928 = torch.aten.detach %3927 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3929 = torch.aten.div.Tensor %3912, %3926 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3930 = torch.aten.add.Tensor %3929, %3928, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3931 = torch.aten.sub.Tensor %3914, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3932 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3933 = torch.aten.pow.Tensor_Tensor %3932, %3931 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3934 = torch.aten.neg %3933 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3935 = torch.aten.sub.Tensor %3914, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3936 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3937 = torch.aten.pow.Tensor_Tensor %3936, %3935 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3938 = torch.aten.sub.Tensor %3937, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3939 = torch.aten.gt.Tensor %3930, %3938 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3940 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3941 = torch.aten.to.dtype %3940, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3942 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3943 = torch.aten.broadcast_to %3941, %3942 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3944 = torch.valsem.aten.copy %3943, %3938, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3945 = torch.aten.where.self %3939, %3944, %3930 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3946 = torch.aten.lt.Tensor %3945, %3934 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %3947 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %3948 = torch.aten.to.dtype %3947, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %3949 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %3950 = torch.aten.broadcast_to %3948, %3949 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %3951 = torch.valsem.aten.copy %3950, %3934, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %3952 = torch.aten.where.self %3946, %3951, %3945 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3953 = torch.aten.round %3952 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3954 = torch.aten.sub.Tensor %3953, %3928, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3955 = torch.aten.mul.Tensor %3954, %3926 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3956 = torch.aten.broadcast_to %171, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %3957 = torch.aten.clone %3956, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %3958 = torch.aten.view %3957, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %3959 = torch.aten.mul.Tensor %172, %3958 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %3960 = torch.aten.convolution %3955, %3959, %173, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %3961 = torch.aten.permute %3960, %1196 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
    %3962 = torch.aten.view %3961, %3072 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3963 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %3964 = torch.aten.sum.dim_IntList %3962, %3963, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3965 = torch.aten.div.Scalar %3964, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3966 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3967 = torch.aten.broadcast_to %3965, %3966 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3968 = torch.aten.sub.Tensor %3962, %3967, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3969 = torch.aten.mul.Tensor %3968, %3968 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3970 = torch.aten.sum.dim_IntList %3969, %3963, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3971 = torch.aten.div.Scalar %3970, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3972 = torch.aten.add.Scalar %3971, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3973 = torch.aten.rsqrt %3972 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %3974 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %3975 = torch.aten.broadcast_to %3973, %3974 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3976 = torch.aten.mul.Tensor %3968, %3975 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3977 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3978 = torch.aten.detach %3977 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3979 = torch.aten.view %3976, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3980 = torch.aten.abs %3979 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_92, %indices_93 = torch.aten.max.dim %3980, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %3981 = torch.aten.view %values_92, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %3982 = torch.aten.broadcast_to %3981, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3983 = torch.aten.clone %3982, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %3984 = torch.aten.view %3983, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3985 = torch.aten.sub.Tensor %3978, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3986 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3987 = torch.aten.pow.Tensor_Tensor %3986, %3985 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3988 = torch.aten.neg %3987 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3989 = torch.aten.neg %3988 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3990 = torch.aten.div.Tensor %3984, %3989 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3991 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3992 = torch.aten.detach %3991 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3993 = torch.aten.div.Tensor %3976, %3990 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3994 = torch.aten.add.Tensor %3993, %3992, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %3995 = torch.aten.sub.Tensor %3978, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %3996 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %3997 = torch.aten.pow.Tensor_Tensor %3996, %3995 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3998 = torch.aten.neg %3997 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %3999 = torch.aten.sub.Tensor %3978, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4000 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4001 = torch.aten.pow.Tensor_Tensor %4000, %3999 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4002 = torch.aten.sub.Tensor %4001, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4003 = torch.aten.gt.Tensor %3994, %4002 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4004 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4005 = torch.aten.to.dtype %4004, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4006 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4007 = torch.aten.broadcast_to %4005, %4006 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4008 = torch.valsem.aten.copy %4007, %4002, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4009 = torch.aten.where.self %4003, %4008, %3994 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4010 = torch.aten.lt.Tensor %4009, %3998 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4011 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4012 = torch.aten.to.dtype %4011, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4013 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4014 = torch.aten.broadcast_to %4012, %4013 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4015 = torch.valsem.aten.copy %4014, %3998, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4016 = torch.aten.where.self %4010, %4015, %4009 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4017 = torch.aten.round %4016 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4018 = torch.aten.sub.Tensor %4017, %3992, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4019 = torch.aten.mul.Tensor %4018, %3990 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4020 = torch.aten.broadcast_to %174, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4021 = torch.aten.clone %4020, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4022 = torch.aten.view %4021, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4023 = torch.aten.mul.Tensor %175, %4022 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4024 = torch.aten.transpose.int %4023, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4025 = torch.aten.view %4019, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4026 = torch.aten.mm %4025, %4024 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4027 = torch.aten.mul.Scalar %176, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4028 = torch.aten.add.Tensor %4027, %4026, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4029 = torch.aten.view %4028, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4030 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4031 = torch.aten.detach %4030 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4032 = torch.aten.view %3976, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4033 = torch.aten.abs %4032 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_94, %indices_95 = torch.aten.max.dim %4033, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %4034 = torch.aten.view %values_94, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %4035 = torch.aten.broadcast_to %4034, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4036 = torch.aten.clone %4035, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4037 = torch.aten.view %4036, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4038 = torch.aten.sub.Tensor %4031, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4039 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4040 = torch.aten.pow.Tensor_Tensor %4039, %4038 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4041 = torch.aten.neg %4040 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4042 = torch.aten.neg %4041 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4043 = torch.aten.div.Tensor %4037, %4042 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4044 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4045 = torch.aten.detach %4044 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4046 = torch.aten.div.Tensor %3976, %4043 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4047 = torch.aten.add.Tensor %4046, %4045, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4048 = torch.aten.sub.Tensor %4031, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4049 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4050 = torch.aten.pow.Tensor_Tensor %4049, %4048 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4051 = torch.aten.neg %4050 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4052 = torch.aten.sub.Tensor %4031, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4053 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4054 = torch.aten.pow.Tensor_Tensor %4053, %4052 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4055 = torch.aten.sub.Tensor %4054, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4056 = torch.aten.gt.Tensor %4047, %4055 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4057 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4058 = torch.aten.to.dtype %4057, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4059 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4060 = torch.aten.broadcast_to %4058, %4059 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4061 = torch.valsem.aten.copy %4060, %4055, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4062 = torch.aten.where.self %4056, %4061, %4047 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4063 = torch.aten.lt.Tensor %4062, %4051 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4064 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4065 = torch.aten.to.dtype %4064, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4066 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4067 = torch.aten.broadcast_to %4065, %4066 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4068 = torch.valsem.aten.copy %4067, %4051, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4069 = torch.aten.where.self %4063, %4068, %4062 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4070 = torch.aten.round %4069 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4071 = torch.aten.sub.Tensor %4070, %4045, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4072 = torch.aten.mul.Tensor %4071, %4043 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4073 = torch.aten.broadcast_to %177, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4074 = torch.aten.clone %4073, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4075 = torch.aten.view %4074, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4076 = torch.aten.mul.Tensor %178, %4075 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4077 = torch.aten.transpose.int %4076, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4078 = torch.aten.view %4072, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4079 = torch.aten.mm %4078, %4077 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4080 = torch.aten.mul.Scalar %179, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4081 = torch.aten.add.Tensor %4080, %4079, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4082 = torch.aten.view %4081, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4083 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4084 = torch.aten.detach %4083 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4085 = torch.aten.view %3976, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4086 = torch.aten.abs %4085 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_96, %indices_97 = torch.aten.max.dim %4086, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %4087 = torch.aten.view %values_96, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %4088 = torch.aten.broadcast_to %4087, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4089 = torch.aten.clone %4088, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4090 = torch.aten.view %4089, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4091 = torch.aten.sub.Tensor %4084, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4092 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4093 = torch.aten.pow.Tensor_Tensor %4092, %4091 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4094 = torch.aten.neg %4093 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4095 = torch.aten.neg %4094 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4096 = torch.aten.div.Tensor %4090, %4095 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4097 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4098 = torch.aten.detach %4097 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4099 = torch.aten.div.Tensor %3976, %4096 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4100 = torch.aten.add.Tensor %4099, %4098, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4101 = torch.aten.sub.Tensor %4084, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4102 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4103 = torch.aten.pow.Tensor_Tensor %4102, %4101 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4104 = torch.aten.neg %4103 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4105 = torch.aten.sub.Tensor %4084, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4106 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4107 = torch.aten.pow.Tensor_Tensor %4106, %4105 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4108 = torch.aten.sub.Tensor %4107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4109 = torch.aten.gt.Tensor %4100, %4108 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4110 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4111 = torch.aten.to.dtype %4110, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4112 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4113 = torch.aten.broadcast_to %4111, %4112 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4114 = torch.valsem.aten.copy %4113, %4108, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4115 = torch.aten.where.self %4109, %4114, %4100 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4116 = torch.aten.lt.Tensor %4115, %4104 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4117 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4118 = torch.aten.to.dtype %4117, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4119 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4120 = torch.aten.broadcast_to %4118, %4119 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4121 = torch.valsem.aten.copy %4120, %4104, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4122 = torch.aten.where.self %4116, %4121, %4115 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4123 = torch.aten.round %4122 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4124 = torch.aten.sub.Tensor %4123, %4098, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4125 = torch.aten.mul.Tensor %4124, %4096 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4126 = torch.aten.broadcast_to %180, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4127 = torch.aten.clone %4126, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4128 = torch.aten.view %4127, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4129 = torch.aten.mul.Tensor %181, %4128 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4130 = torch.aten.transpose.int %4129, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4131 = torch.aten.view %4125, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4132 = torch.aten.mm %4131, %4130 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4133 = torch.aten.mul.Scalar %182, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4134 = torch.aten.add.Tensor %4133, %4132, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4135 = torch.aten.view %4134, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4136 = torch.aten.view %4029, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4137 = torch.aten.permute %4136, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4138 = torch.aten.clone %4137, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4139 = torch.aten.view %4138, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4140 = torch.aten.view %4082, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4141 = torch.aten.permute %4140, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4142 = torch.aten.clone %4141, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4143 = torch.aten.view %4142, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4144 = torch.aten.view %4135, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4145 = torch.aten.permute %4144, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4146 = torch.aten.clone %4145, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4147 = torch.aten.view %4146, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4148 = torch.aten.transpose.int %4143, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
    %4149 = torch.aten.broadcast_to %4139, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4150 = torch.aten.view %4149, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4151 = torch.aten.broadcast_to %4148, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
    %4152 = torch.aten.view %4151, %3269 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16> loc(#loc1)
    %4153 = torch.aten.bmm %4150, %4152 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4154 = torch.aten.view %4153, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4155 = torch.aten.mul.Tensor %4154, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %values_98, %indices_99 = torch.aten.max.dim %4155, %int-1, %true : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
    %4156 = torch.aten.sub.Tensor %4155, %values_98, %float1.000000e00 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4157 = torch.aten.exp %4156 : !torch.vtensor<[16,1024,1024],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4158 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %4159 = torch.aten.sum.dim_IntList %4157, %4158, %true, %none : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
    %4160 = torch.aten.div.Tensor %4157, %4159 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4161 = torch.aten.broadcast_to %4160, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4162 = torch.aten.view %4161, %3273 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16> loc(#loc1)
    %4163 = torch.aten.broadcast_to %4147, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4164 = torch.aten.view %4163, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4165 = torch.aten.bmm %4162, %4164 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4166 = torch.aten.view %4165, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4167 = torch.aten.view %4166, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4168 = torch.aten.permute %4167, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4169 = torch.aten.clone %4168, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4170 = torch.aten.view %4169, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4171 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4172 = torch.aten.detach %4171 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4173 = torch.aten.view %4170, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4174 = torch.aten.abs %4173 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_100, %indices_101 = torch.aten.max.dim %4174, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %4175 = torch.aten.view %values_100, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %4176 = torch.aten.broadcast_to %4175, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4177 = torch.aten.clone %4176, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4178 = torch.aten.view %4177, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4179 = torch.aten.sub.Tensor %4172, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4180 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4181 = torch.aten.pow.Tensor_Tensor %4180, %4179 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4182 = torch.aten.neg %4181 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4183 = torch.aten.neg %4182 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4184 = torch.aten.div.Tensor %4178, %4183 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4185 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4186 = torch.aten.detach %4185 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4187 = torch.aten.div.Tensor %4170, %4184 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4188 = torch.aten.add.Tensor %4187, %4186, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4189 = torch.aten.sub.Tensor %4172, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4190 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4191 = torch.aten.pow.Tensor_Tensor %4190, %4189 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4192 = torch.aten.neg %4191 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4193 = torch.aten.sub.Tensor %4172, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4194 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4195 = torch.aten.pow.Tensor_Tensor %4194, %4193 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4196 = torch.aten.sub.Tensor %4195, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4197 = torch.aten.gt.Tensor %4188, %4196 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4198 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4199 = torch.aten.to.dtype %4198, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4200 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4201 = torch.aten.broadcast_to %4199, %4200 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4202 = torch.valsem.aten.copy %4201, %4196, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4203 = torch.aten.where.self %4197, %4202, %4188 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4204 = torch.aten.lt.Tensor %4203, %4192 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4205 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4206 = torch.aten.to.dtype %4205, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4207 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4208 = torch.aten.broadcast_to %4206, %4207 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4209 = torch.valsem.aten.copy %4208, %4192, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4210 = torch.aten.where.self %4204, %4209, %4203 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4211 = torch.aten.round %4210 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4212 = torch.aten.sub.Tensor %4211, %4186, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4213 = torch.aten.mul.Tensor %4212, %4184 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4214 = torch.aten.broadcast_to %183, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4215 = torch.aten.clone %4214, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4216 = torch.aten.view %4215, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4217 = torch.aten.mul.Tensor %184, %4216 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4218 = torch.aten.transpose.int %4217, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4219 = torch.aten.view %4213, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4220 = torch.aten.mm %4219, %4218 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4221 = torch.aten.mul.Scalar %185, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4222 = torch.aten.add.Tensor %4221, %4220, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4223 = torch.aten.view %4222, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4224 = torch.aten.add.Tensor %4223, %3962, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4225 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %4226 = torch.aten.sum.dim_IntList %4224, %4225, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4227 = torch.aten.div.Scalar %4226, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4228 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4229 = torch.aten.broadcast_to %4227, %4228 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4230 = torch.aten.sub.Tensor %4224, %4229, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4231 = torch.aten.mul.Tensor %4230, %4230 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4232 = torch.aten.sum.dim_IntList %4231, %4225, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4233 = torch.aten.div.Scalar %4232, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4234 = torch.aten.add.Scalar %4233, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4235 = torch.aten.rsqrt %4234 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4236 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4237 = torch.aten.broadcast_to %4235, %4236 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4238 = torch.aten.mul.Tensor %4230, %4237 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4239 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4240 = torch.aten.detach %4239 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4241 = torch.aten.view %4238, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4242 = torch.aten.abs %4241 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_102, %indices_103 = torch.aten.max.dim %4242, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %4243 = torch.aten.view %values_102, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %4244 = torch.aten.broadcast_to %4243, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4245 = torch.aten.clone %4244, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4246 = torch.aten.view %4245, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4247 = torch.aten.sub.Tensor %4240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4248 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4249 = torch.aten.pow.Tensor_Tensor %4248, %4247 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4250 = torch.aten.neg %4249 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4251 = torch.aten.neg %4250 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4252 = torch.aten.div.Tensor %4246, %4251 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4253 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4254 = torch.aten.detach %4253 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4255 = torch.aten.div.Tensor %4238, %4252 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4256 = torch.aten.add.Tensor %4255, %4254, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4257 = torch.aten.sub.Tensor %4240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4258 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4259 = torch.aten.pow.Tensor_Tensor %4258, %4257 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4260 = torch.aten.neg %4259 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4261 = torch.aten.sub.Tensor %4240, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4262 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4263 = torch.aten.pow.Tensor_Tensor %4262, %4261 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4264 = torch.aten.sub.Tensor %4263, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4265 = torch.aten.gt.Tensor %4256, %4264 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4266 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4267 = torch.aten.to.dtype %4266, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4268 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4269 = torch.aten.broadcast_to %4267, %4268 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4270 = torch.valsem.aten.copy %4269, %4264, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4271 = torch.aten.where.self %4265, %4270, %4256 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4272 = torch.aten.lt.Tensor %4271, %4260 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4273 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4274 = torch.aten.to.dtype %4273, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4275 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4276 = torch.aten.broadcast_to %4274, %4275 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4277 = torch.valsem.aten.copy %4276, %4260, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4278 = torch.aten.where.self %4272, %4277, %4271 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4279 = torch.aten.round %4278 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4280 = torch.aten.sub.Tensor %4279, %4254, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4281 = torch.aten.mul.Tensor %4280, %4252 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4282 = torch.aten.broadcast_to %186, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4283 = torch.aten.clone %4282, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4284 = torch.aten.view %4283, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4285 = torch.aten.mul.Tensor %187, %4284 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4286 = torch.aten.transpose.int %4285, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4287 = torch.aten.view %4281, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4288 = torch.aten.mm %4287, %4286 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4289 = torch.aten.mul.Scalar %188, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4290 = torch.aten.add.Tensor %4289, %4288, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4291 = torch.aten.view %4290, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4292 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4293 = torch.aten.detach %4292 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4294 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %4295 = torch.aten.abs %4294 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_104, %indices_105 = torch.aten.max.dim %4295, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %4296 = torch.aten.view %values_104, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %4297 = torch.aten.broadcast_to %4296, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %4298 = torch.aten.clone %4297, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %4299 = torch.aten.view %4298, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4300 = torch.aten.sub.Tensor %4293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4301 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4302 = torch.aten.pow.Tensor_Tensor %4301, %4300 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4303 = torch.aten.neg %4302 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4304 = torch.aten.neg %4303 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4305 = torch.aten.div.Tensor %4299, %4304 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4306 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4307 = torch.aten.detach %4306 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4308 = torch.aten.div.Tensor %arg2, %4305 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4309 = torch.aten.add.Tensor %4308, %4307, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4310 = torch.aten.sub.Tensor %4293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4311 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4312 = torch.aten.pow.Tensor_Tensor %4311, %4310 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4313 = torch.aten.neg %4312 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4314 = torch.aten.sub.Tensor %4293, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4315 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4316 = torch.aten.pow.Tensor_Tensor %4315, %4314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4317 = torch.aten.sub.Tensor %4316, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4318 = torch.aten.gt.Tensor %4309, %4317 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %4319 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4320 = torch.aten.to.dtype %4319, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4321 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4322 = torch.aten.broadcast_to %4320, %4321 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4323 = torch.valsem.aten.copy %4322, %4317, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4324 = torch.aten.where.self %4318, %4323, %4309 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4325 = torch.aten.lt.Tensor %4324, %4313 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %4326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4327 = torch.aten.to.dtype %4326, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4328 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4329 = torch.aten.broadcast_to %4327, %4328 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4330 = torch.valsem.aten.copy %4329, %4313, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4331 = torch.aten.where.self %4325, %4330, %4324 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4332 = torch.aten.round %4331 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4333 = torch.aten.sub.Tensor %4332, %4307, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4334 = torch.aten.mul.Tensor %4333, %4305 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4335 = torch.aten.broadcast_to %189, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %4336 = torch.aten.clone %4335, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %4337 = torch.aten.view %4336, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %4338 = torch.aten.mul.Tensor %190, %4337 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %4339 = torch.aten.transpose.int %4338, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
    %4340 = torch.aten.view %4334, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %4341 = torch.aten.mm %4340, %4339 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
    %4342 = torch.aten.view %4341, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
    %4343 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4344 = torch.aten.detach %4343 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4345 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %4346 = torch.aten.abs %4345 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_106, %indices_107 = torch.aten.max.dim %4346, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %4347 = torch.aten.view %values_106, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %4348 = torch.aten.broadcast_to %4347, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %4349 = torch.aten.clone %4348, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %4350 = torch.aten.view %4349, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4351 = torch.aten.sub.Tensor %4344, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4352 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4353 = torch.aten.pow.Tensor_Tensor %4352, %4351 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4354 = torch.aten.neg %4353 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4355 = torch.aten.neg %4354 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4356 = torch.aten.div.Tensor %4350, %4355 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4357 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4358 = torch.aten.detach %4357 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4359 = torch.aten.div.Tensor %arg2, %4356 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4360 = torch.aten.add.Tensor %4359, %4358, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4361 = torch.aten.sub.Tensor %4344, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4362 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4363 = torch.aten.pow.Tensor_Tensor %4362, %4361 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4364 = torch.aten.neg %4363 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4365 = torch.aten.sub.Tensor %4344, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4366 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4367 = torch.aten.pow.Tensor_Tensor %4366, %4365 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4368 = torch.aten.sub.Tensor %4367, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4369 = torch.aten.gt.Tensor %4360, %4368 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %4370 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4371 = torch.aten.to.dtype %4370, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4372 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4373 = torch.aten.broadcast_to %4371, %4372 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4374 = torch.valsem.aten.copy %4373, %4368, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4375 = torch.aten.where.self %4369, %4374, %4360 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4376 = torch.aten.lt.Tensor %4375, %4364 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %4377 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4378 = torch.aten.to.dtype %4377, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4379 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4380 = torch.aten.broadcast_to %4378, %4379 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4381 = torch.valsem.aten.copy %4380, %4364, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4382 = torch.aten.where.self %4376, %4381, %4375 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4383 = torch.aten.round %4382 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4384 = torch.aten.sub.Tensor %4383, %4358, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4385 = torch.aten.mul.Tensor %4384, %4356 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %4386 = torch.aten.broadcast_to %191, %3456 : !torch.vtensor<[640,48,1],f16>, !torch.list<int> -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %4387 = torch.aten.clone %4386, %int0 : !torch.vtensor<[640,48,16],f16>, !torch.int -> !torch.vtensor<[640,48,16],f16> loc(#loc1)
    %4388 = torch.aten.view %4387, %3459 : !torch.vtensor<[640,48,16],f16>, !torch.list<int> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %4389 = torch.aten.mul.Tensor %192, %4388 : !torch.vtensor<[640,768],si8>, !torch.vtensor<[640,768],f16> -> !torch.vtensor<[640,768],f16> loc(#loc1)
    %4390 = torch.aten.transpose.int %4389, %int0, %int1 : !torch.vtensor<[640,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,640],f16> loc(#loc1)
    %4391 = torch.aten.view %4385, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %4392 = torch.aten.mm %4391, %4390 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16> loc(#loc1)
    %4393 = torch.aten.view %4392, %3465 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16> loc(#loc1)
    %4394 = torch.aten.view %4291, %3252 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4395 = torch.aten.permute %4394, %1380 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4396 = torch.aten.clone %4395, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4397 = torch.aten.view %4396, %3256 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4398 = torch.aten.view %4342, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
    %4399 = torch.aten.permute %4398, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %4400 = torch.aten.clone %4399, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %4401 = torch.aten.view %4400, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %4402 = torch.aten.view %4393, %3522 : !torch.vtensor<[2,77,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16> loc(#loc1)
    %4403 = torch.aten.permute %4402, %1380 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %4404 = torch.aten.clone %4403, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16> loc(#loc1)
    %4405 = torch.aten.view %4404, %3526 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %4406 = torch.aten.transpose.int %4401, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
    %4407 = torch.aten.broadcast_to %4397, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4408 = torch.aten.view %4407, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4409 = torch.aten.broadcast_to %4406, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
    %4410 = torch.aten.view %4409, %3535 : !torch.vtensor<[16,80,77],f16>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16> loc(#loc1)
    %4411 = torch.aten.bmm %4408, %4410 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4412 = torch.aten.view %4411, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4413 = torch.aten.mul.Tensor %4412, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %values_108, %indices_109 = torch.aten.max.dim %4413, %int-1, %true : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1],f16>, !torch.vtensor<[16,1024,1],si64> loc(#loc1)
    %4414 = torch.aten.sub.Tensor %4413, %values_108, %float1.000000e00 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16>, !torch.float -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4415 = torch.aten.exp %4414 : !torch.vtensor<[16,1024,77],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4416 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %4417 = torch.aten.sum.dim_IntList %4415, %4416, %true, %none : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,1024,1],f16> loc(#loc1)
    %4418 = torch.aten.div.Tensor %4415, %4417 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,1024,1],f16> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4419 = torch.aten.broadcast_to %4418, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4420 = torch.aten.view %4419, %3539 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16> loc(#loc1)
    %4421 = torch.aten.broadcast_to %4405, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %4422 = torch.aten.view %4421, %3526 : !torch.vtensor<[16,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16> loc(#loc1)
    %4423 = torch.aten.bmm %4420, %4422 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4424 = torch.aten.view %4423, %3256 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16> loc(#loc1)
    %4425 = torch.aten.view %4424, %3287 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16> loc(#loc1)
    %4426 = torch.aten.permute %4425, %1380 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4427 = torch.aten.clone %4426, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16> loc(#loc1)
    %4428 = torch.aten.view %4427, %3072 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4429 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4430 = torch.aten.detach %4429 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4431 = torch.aten.view %4428, %3090 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4432 = torch.aten.abs %4431 : !torch.vtensor<[2,1024,40,16],f16> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %values_110, %indices_111 = torch.aten.max.dim %4432, %int3, %true : !torch.vtensor<[2,1024,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,1024,40,1],f16>, !torch.vtensor<[2,1024,40,1],si64> loc(#loc1)
    %4433 = torch.aten.view %values_110, %3093 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,1],f16> loc(#loc1)
    %4434 = torch.aten.broadcast_to %4433, %3090 : !torch.vtensor<[2,1024,40,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4435 = torch.aten.clone %4434, %int0 : !torch.vtensor<[2,1024,40,16],f16>, !torch.int -> !torch.vtensor<[2,1024,40,16],f16> loc(#loc1)
    %4436 = torch.aten.view %4435, %3072 : !torch.vtensor<[2,1024,40,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4437 = torch.aten.sub.Tensor %4430, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4438 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4439 = torch.aten.pow.Tensor_Tensor %4438, %4437 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4440 = torch.aten.neg %4439 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4441 = torch.aten.neg %4440 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4442 = torch.aten.div.Tensor %4436, %4441 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4443 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4444 = torch.aten.detach %4443 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4445 = torch.aten.div.Tensor %4428, %4442 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4446 = torch.aten.add.Tensor %4445, %4444, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4447 = torch.aten.sub.Tensor %4430, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4448 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4449 = torch.aten.pow.Tensor_Tensor %4448, %4447 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4450 = torch.aten.neg %4449 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4451 = torch.aten.sub.Tensor %4430, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4452 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4453 = torch.aten.pow.Tensor_Tensor %4452, %4451 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4454 = torch.aten.sub.Tensor %4453, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4455 = torch.aten.gt.Tensor %4446, %4454 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4456 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4457 = torch.aten.to.dtype %4456, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4458 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4459 = torch.aten.broadcast_to %4457, %4458 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4460 = torch.valsem.aten.copy %4459, %4454, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4461 = torch.aten.where.self %4455, %4460, %4446 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4462 = torch.aten.lt.Tensor %4461, %4450 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1024,640],i1> loc(#loc1)
    %4463 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4464 = torch.aten.to.dtype %4463, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4465 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4466 = torch.aten.broadcast_to %4464, %4465 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4467 = torch.valsem.aten.copy %4466, %4450, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4468 = torch.aten.where.self %4462, %4467, %4461 : !torch.vtensor<[2,1024,640],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4469 = torch.aten.round %4468 : !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4470 = torch.aten.sub.Tensor %4469, %4444, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4471 = torch.aten.mul.Tensor %4470, %4442 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4472 = torch.aten.broadcast_to %193, %3133 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4473 = torch.aten.clone %4472, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16> loc(#loc1)
    %4474 = torch.aten.view %4473, %3136 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4475 = torch.aten.mul.Tensor %194, %4474 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4476 = torch.aten.transpose.int %4475, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> loc(#loc1)
    %4477 = torch.aten.view %4471, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4478 = torch.aten.mm %4477, %4476 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4479 = torch.aten.mul.Scalar %195, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4480 = torch.aten.add.Tensor %4479, %4478, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4481 = torch.aten.view %4480, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4482 = torch.aten.add.Tensor %4481, %4224, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4483 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %4484 = torch.aten.sum.dim_IntList %4482, %4483, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4485 = torch.aten.div.Scalar %4484, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4486 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4487 = torch.aten.broadcast_to %4485, %4486 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4488 = torch.aten.sub.Tensor %4482, %4487, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4489 = torch.aten.mul.Tensor %4488, %4488 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4490 = torch.aten.sum.dim_IntList %4489, %4483, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4491 = torch.aten.div.Scalar %4490, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4492 = torch.aten.add.Scalar %4491, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4493 = torch.aten.rsqrt %4492 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> loc(#loc1)
    %4494 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4495 = torch.aten.broadcast_to %4493, %4494 : !torch.vtensor<[2,1024,1],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4496 = torch.aten.mul.Tensor %4488, %4495 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4497 = torch.aten.broadcast_to %196, %3625 : !torch.vtensor<[5120,40,1],f16>, !torch.list<int> -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
    %4498 = torch.aten.clone %4497, %int0 : !torch.vtensor<[5120,40,16],f16>, !torch.int -> !torch.vtensor<[5120,40,16],f16> loc(#loc1)
    %4499 = torch.aten.view %4498, %3628 : !torch.vtensor<[5120,40,16],f16>, !torch.list<int> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
    %4500 = torch.aten.mul.Tensor %197, %4499 : !torch.vtensor<[5120,640],si8>, !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[5120,640],f16> loc(#loc1)
    %4501 = torch.aten.transpose.int %4500, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16> loc(#loc1)
    %4502 = torch.aten.view %4496, %3140 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4503 = torch.aten.mm %4502, %4501 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
    %4504 = torch.aten.mul.Scalar %198, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16> loc(#loc1)
    %4505 = torch.aten.add.Tensor %4504, %4503, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16> loc(#loc1)
    %4506 = torch.aten.view %4505, %3636 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16> loc(#loc1)
    %4507 = torch.aten.slice.Tensor %4506, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %4508 = torch.aten.slice.Tensor %4506, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %4509 = torch.aten.gelu %4508, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %4510 = torch.aten.mul.Tensor %4507, %4509 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16> loc(#loc1)
    %4511 = torch.aten.broadcast_to %199, %3642 : !torch.vtensor<[640,160,1],f16>, !torch.list<int> -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
    %4512 = torch.aten.clone %4511, %int0 : !torch.vtensor<[640,160,16],f16>, !torch.int -> !torch.vtensor<[640,160,16],f16> loc(#loc1)
    %4513 = torch.aten.view %4512, %3645 : !torch.vtensor<[640,160,16],f16>, !torch.list<int> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
    %4514 = torch.aten.mul.Tensor %200, %4513 : !torch.vtensor<[640,2560],si8>, !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[640,2560],f16> loc(#loc1)
    %4515 = torch.aten.transpose.int %4514, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16> loc(#loc1)
    %4516 = torch.aten.view %4510, %3649 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16> loc(#loc1)
    %4517 = torch.aten.mm %4516, %4515 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4518 = torch.aten.mul.Scalar %201, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> loc(#loc1)
    %4519 = torch.aten.add.Tensor %4518, %4517, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> loc(#loc1)
    %4520 = torch.aten.view %4519, %3072 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4521 = torch.aten.add.Tensor %4520, %4482, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> loc(#loc1)
    %4522 = torch.aten.view %4521, %3656 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> loc(#loc1)
    %4523 = torch.aten.permute %4522, %1789 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4524 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4525 = torch.aten.detach %4524 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4526 = torch.aten.view %4523, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %4527 = torch.aten.abs %4526 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_112, %indices_113 = torch.aten.max.dim %4527, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %4528 = torch.aten.view %values_112, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %4529 = torch.aten.broadcast_to %4528, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %4530 = torch.aten.clone %4529, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %4531 = torch.aten.view %4530, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4532 = torch.aten.sub.Tensor %4525, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4533 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4534 = torch.aten.pow.Tensor_Tensor %4533, %4532 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4535 = torch.aten.neg %4534 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4536 = torch.aten.neg %4535 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4537 = torch.aten.div.Tensor %4531, %4536 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4538 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4539 = torch.aten.detach %4538 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4540 = torch.aten.div.Tensor %4523, %4537 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4541 = torch.aten.add.Tensor %4540, %4539, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4542 = torch.aten.sub.Tensor %4525, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4543 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4544 = torch.aten.pow.Tensor_Tensor %4543, %4542 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4545 = torch.aten.neg %4544 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4546 = torch.aten.sub.Tensor %4525, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4547 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4548 = torch.aten.pow.Tensor_Tensor %4547, %4546 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4549 = torch.aten.sub.Tensor %4548, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4550 = torch.aten.gt.Tensor %4541, %4549 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %4551 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4552 = torch.aten.to.dtype %4551, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4553 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4554 = torch.aten.broadcast_to %4552, %4553 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4555 = torch.valsem.aten.copy %4554, %4549, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4556 = torch.aten.where.self %4550, %4555, %4541 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4557 = torch.aten.lt.Tensor %4556, %4545 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %4558 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4559 = torch.aten.to.dtype %4558, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4560 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4561 = torch.aten.broadcast_to %4559, %4560 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4562 = torch.valsem.aten.copy %4561, %4545, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4563 = torch.aten.where.self %4557, %4562, %4556 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4564 = torch.aten.round %4563 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4565 = torch.aten.sub.Tensor %4564, %4539, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4566 = torch.aten.mul.Tensor %4565, %4537 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4567 = torch.aten.broadcast_to %202, %3064 : !torch.vtensor<[640,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %4568 = torch.aten.clone %4567, %int0 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,40,16,1,1],f16> loc(#loc1)
    %4569 = torch.aten.view %4568, %3067 : !torch.vtensor<[640,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %4570 = torch.aten.mul.Tensor %203, %4569 : !torch.vtensor<[640,640,1,1],si8>, !torch.vtensor<[640,640,1,1],f16> -> !torch.vtensor<[640,640,1,1],f16> loc(#loc1)
    %4571 = torch.aten.convolution %4566, %4570, %204, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4572 = torch.aten.add.Tensor %4571, %3885, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4573 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4574 = torch.aten.detach %4573 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4575 = torch.aten.view %4572, %2892 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %4576 = torch.aten.abs %4575 : !torch.vtensor<[2,40,16,32,32],f16> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %values_114, %indices_115 = torch.aten.max.dim %4576, %int2, %true : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,32,32],f16>, !torch.vtensor<[2,40,1,32,32],si64> loc(#loc1)
    %4577 = torch.aten.view %values_114, %2895 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,32,32],f16> loc(#loc1)
    %4578 = torch.aten.broadcast_to %4577, %2892 : !torch.vtensor<[2,40,1,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %4579 = torch.aten.clone %4578, %int0 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.int -> !torch.vtensor<[2,40,16,32,32],f16> loc(#loc1)
    %4580 = torch.aten.view %4579, %2875 : !torch.vtensor<[2,40,16,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4581 = torch.aten.sub.Tensor %4574, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4582 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4583 = torch.aten.pow.Tensor_Tensor %4582, %4581 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4584 = torch.aten.neg %4583 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4585 = torch.aten.neg %4584 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4586 = torch.aten.div.Tensor %4580, %4585 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4587 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4588 = torch.aten.detach %4587 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4589 = torch.aten.div.Tensor %4572, %4586 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4590 = torch.aten.add.Tensor %4589, %4588, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4591 = torch.aten.sub.Tensor %4574, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4592 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4593 = torch.aten.pow.Tensor_Tensor %4592, %4591 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4594 = torch.aten.neg %4593 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4595 = torch.aten.sub.Tensor %4574, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4596 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4597 = torch.aten.pow.Tensor_Tensor %4596, %4595 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4598 = torch.aten.sub.Tensor %4597, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4599 = torch.aten.gt.Tensor %4590, %4598 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %4600 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4601 = torch.aten.to.dtype %4600, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4602 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4603 = torch.aten.broadcast_to %4601, %4602 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4604 = torch.valsem.aten.copy %4603, %4598, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4605 = torch.aten.where.self %4599, %4604, %4590 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4606 = torch.aten.lt.Tensor %4605, %4594 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,32,32],i1> loc(#loc1)
    %4607 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4608 = torch.aten.to.dtype %4607, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4609 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4610 = torch.aten.broadcast_to %4608, %4609 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4611 = torch.valsem.aten.copy %4610, %4594, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4612 = torch.aten.where.self %4606, %4611, %4605 : !torch.vtensor<[2,640,32,32],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4613 = torch.aten.round %4612 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4614 = torch.aten.sub.Tensor %4613, %4588, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4615 = torch.aten.mul.Tensor %4614, %4586 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> loc(#loc1)
    %4616 = torch.aten.broadcast_to %205, %2935 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %4617 = torch.aten.clone %4616, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16> loc(#loc1)
    %4618 = torch.aten.view %4617, %2938 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %4619 = torch.aten.mul.Tensor %206, %4618 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16> loc(#loc1)
    %4620 = torch.aten.convolution %4615, %4619, %207, %2752, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4621 = torch.aten.clone %4620, %int0 : !torch.vtensor<[2,640,16,16],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4622 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4623 = torch.aten.view %4621, %4622 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16> loc(#loc1)
    %4624 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4625 = torch.aten.to.dtype %4624, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4626 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4627 = torch.aten.broadcast_to %4625, %4626 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
    %4628 = torch.valsem.aten.copy %4627, %4623, %false : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,20,256],f16>, !torch.bool -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
    %4629 = torch.aten.to.dtype %4628, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f64> loc(#loc1)
    %4630 = torch.aten.sum.dim_IntList %4629, %943, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4631 = torch.aten.div.Scalar %4630, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4632 = torch.aten.sub.Tensor %4629, %4631, %float1.000000e00 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,256],f64> loc(#loc1)
    %4633 = torch.aten.mul.Tensor %4632, %4632 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,20,256],f64> -> !torch.vtensor<[2,32,20,256],f64> loc(#loc1)
    %4634 = torch.aten.sum.dim_IntList %4633, %943, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4635 = torch.aten.div.Scalar %4634, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4636 = torch.aten.to.dtype %4635, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4637 = torch.aten.sum.dim_IntList %4628, %943, %true, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4638 = torch.aten.div.Scalar %4637, %int5120 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4639 = torch.aten.add.Tensor %4636, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4640 = torch.aten.rsqrt %4639 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4641 = torch.aten.sub.Tensor %4623, %4638, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
    %4642 = torch.aten.mul.Tensor %4641, %4640 : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,256],f32> loc(#loc1)
    %4643 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4644 = torch.aten.view %4642, %4643 : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f32> loc(#loc1)
    %4645 = torch.aten.unsqueeze %208, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %4646 = torch.aten.unsqueeze %4645, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %4647 = torch.aten.mul.Tensor %4644, %4646 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f32> loc(#loc1)
    %4648 = torch.aten.unsqueeze %209, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16> loc(#loc1)
    %4649 = torch.aten.unsqueeze %4648, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16> loc(#loc1)
    %4650 = torch.aten.add.Tensor %4647, %4649, %int1 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f32> loc(#loc1)
    %4651 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4652 = torch.aten.to.dtype %4651, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4653 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4654 = torch.aten.broadcast_to %4652, %4653 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4655 = torch.valsem.aten.copy %4654, %4650, %false : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f32>, !torch.bool -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4656 = torch.aten.sigmoid %4655 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4657 = torch.aten.mul.Tensor %4656, %4655 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4658 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4659 = torch.aten.detach %4658 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4660 = torch.prim.ListConstruct %int2, %int40, %int16, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4661 = torch.aten.view %4657, %4660 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %4662 = torch.aten.abs %4661 : !torch.vtensor<[2,40,16,16,16],f16> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %values_116, %indices_117 = torch.aten.max.dim %4662, %int2, %true : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,16,16],f16>, !torch.vtensor<[2,40,1,16,16],si64> loc(#loc1)
    %4663 = torch.prim.ListConstruct %int2, %int40, %int1, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4664 = torch.aten.view %values_116, %4663 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,16,16],f16> loc(#loc1)
    %4665 = torch.aten.broadcast_to %4664, %4660 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %4666 = torch.aten.clone %4665, %int0 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %4667 = torch.aten.view %4666, %4643 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4668 = torch.aten.sub.Tensor %4659, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4669 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4670 = torch.aten.pow.Tensor_Tensor %4669, %4668 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4671 = torch.aten.neg %4670 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4672 = torch.aten.neg %4671 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4673 = torch.aten.div.Tensor %4667, %4672 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4674 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4675 = torch.aten.detach %4674 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4676 = torch.aten.div.Tensor %4657, %4673 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4677 = torch.aten.add.Tensor %4676, %4675, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4678 = torch.aten.sub.Tensor %4659, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4679 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4680 = torch.aten.pow.Tensor_Tensor %4679, %4678 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4681 = torch.aten.neg %4680 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4682 = torch.aten.sub.Tensor %4659, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4683 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4684 = torch.aten.pow.Tensor_Tensor %4683, %4682 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4685 = torch.aten.sub.Tensor %4684, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4686 = torch.aten.gt.Tensor %4677, %4685 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
    %4687 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4688 = torch.aten.to.dtype %4687, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4689 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4690 = torch.aten.broadcast_to %4688, %4689 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4691 = torch.valsem.aten.copy %4690, %4685, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4692 = torch.aten.where.self %4686, %4691, %4677 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4693 = torch.aten.lt.Tensor %4692, %4681 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
    %4694 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4695 = torch.aten.to.dtype %4694, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4696 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4697 = torch.aten.broadcast_to %4695, %4696 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4698 = torch.valsem.aten.copy %4697, %4681, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4699 = torch.aten.where.self %4693, %4698, %4692 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4700 = torch.aten.round %4699 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4701 = torch.aten.sub.Tensor %4700, %4675, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4702 = torch.aten.mul.Tensor %4701, %4673 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4703 = torch.prim.ListConstruct %int1280, %int40, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %4704 = torch.aten.broadcast_to %210, %4703 : !torch.vtensor<[1280,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,40,16,3,3],f16> loc(#loc1)
    %4705 = torch.aten.clone %4704, %int0 : !torch.vtensor<[1280,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,40,16,3,3],f16> loc(#loc1)
    %4706 = torch.prim.ListConstruct %int1280, %int640, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %4707 = torch.aten.view %4705, %4706 : !torch.vtensor<[1280,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,640,3,3],f16> loc(#loc1)
    %4708 = torch.aten.mul.Tensor %211, %4707 : !torch.vtensor<[1280,640,3,3],si8>, !torch.vtensor<[1280,640,3,3],f16> -> !torch.vtensor<[1280,640,3,3],f16> loc(#loc1)
    %4709 = torch.aten.convolution %4702, %4708, %212, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4710 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %4711 = torch.aten.mul.Tensor %4710, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %4712 = torch.aten.transpose.int %213, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %4713 = torch.aten.mm %4711, %4712 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %4714 = torch.aten.mul.Scalar %214, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %4715 = torch.aten.add.Tensor %4714, %4713, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %4716 = torch.aten.slice.Tensor %4715, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %4717 = torch.aten.slice.Tensor %4716, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %4718 = torch.aten.unsqueeze %4717, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> loc(#loc1)
    %4719 = torch.aten.unsqueeze %4718, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> loc(#loc1)
    %4720 = torch.aten.add.Tensor %4709, %4719, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4721 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4722 = torch.aten.view %4720, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
    %4723 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4724 = torch.aten.to.dtype %4723, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4725 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4726 = torch.aten.broadcast_to %4724, %4725 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4727 = torch.valsem.aten.copy %4726, %4722, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4728 = torch.aten.to.dtype %4727, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %4729 = torch.aten.sum.dim_IntList %4728, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4730 = torch.aten.div.Scalar %4729, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4731 = torch.aten.sub.Tensor %4728, %4730, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %4732 = torch.aten.mul.Tensor %4731, %4731 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %4733 = torch.aten.sum.dim_IntList %4732, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4734 = torch.aten.div.Scalar %4733, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4735 = torch.aten.to.dtype %4734, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4736 = torch.aten.sum.dim_IntList %4727, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4737 = torch.aten.div.Scalar %4736, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4738 = torch.aten.add.Tensor %4735, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4739 = torch.aten.rsqrt %4738 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4740 = torch.aten.sub.Tensor %4722, %4737, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4741 = torch.aten.mul.Tensor %4740, %4739 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4742 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4743 = torch.aten.view %4741, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %4744 = torch.aten.unsqueeze %215, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
    %4745 = torch.aten.unsqueeze %4744, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
    %4746 = torch.aten.mul.Tensor %4743, %4745 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %4747 = torch.aten.unsqueeze %216, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
    %4748 = torch.aten.unsqueeze %4747, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
    %4749 = torch.aten.add.Tensor %4746, %4748, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %4750 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4751 = torch.aten.to.dtype %4750, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4752 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4753 = torch.aten.broadcast_to %4751, %4752 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4754 = torch.valsem.aten.copy %4753, %4749, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4755 = torch.aten.sigmoid %4754 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4756 = torch.aten.mul.Tensor %4755, %4754 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4757 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4758 = torch.aten.detach %4757 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4759 = torch.prim.ListConstruct %int2, %int80, %int16, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4760 = torch.aten.view %4756, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %4761 = torch.aten.abs %4760 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_118, %indices_119 = torch.aten.max.dim %4761, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %4762 = torch.prim.ListConstruct %int2, %int80, %int1, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4763 = torch.aten.view %values_118, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %4764 = torch.aten.broadcast_to %4763, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %4765 = torch.aten.clone %4764, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %4766 = torch.aten.view %4765, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4767 = torch.aten.sub.Tensor %4758, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4768 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4769 = torch.aten.pow.Tensor_Tensor %4768, %4767 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4770 = torch.aten.neg %4769 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4771 = torch.aten.neg %4770 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4772 = torch.aten.div.Tensor %4766, %4771 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4773 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4774 = torch.aten.detach %4773 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4775 = torch.aten.div.Tensor %4756, %4772 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4776 = torch.aten.add.Tensor %4775, %4774, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4777 = torch.aten.sub.Tensor %4758, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4778 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4779 = torch.aten.pow.Tensor_Tensor %4778, %4777 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4780 = torch.aten.neg %4779 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4781 = torch.aten.sub.Tensor %4758, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4782 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4783 = torch.aten.pow.Tensor_Tensor %4782, %4781 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4784 = torch.aten.sub.Tensor %4783, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4785 = torch.aten.gt.Tensor %4776, %4784 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %4786 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4787 = torch.aten.to.dtype %4786, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4788 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4789 = torch.aten.broadcast_to %4787, %4788 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4790 = torch.valsem.aten.copy %4789, %4784, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4791 = torch.aten.where.self %4785, %4790, %4776 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4792 = torch.aten.lt.Tensor %4791, %4780 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %4793 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4794 = torch.aten.to.dtype %4793, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4795 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4796 = torch.aten.broadcast_to %4794, %4795 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4797 = torch.valsem.aten.copy %4796, %4780, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4798 = torch.aten.where.self %4792, %4797, %4791 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4799 = torch.aten.round %4798 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4800 = torch.aten.sub.Tensor %4799, %4774, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4801 = torch.aten.mul.Tensor %4800, %4772 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4802 = torch.prim.ListConstruct %int1280, %int80, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %4803 = torch.aten.broadcast_to %217, %4802 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
    %4804 = torch.aten.clone %4803, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
    %4805 = torch.prim.ListConstruct %int1280, %int1280, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %4806 = torch.aten.view %4804, %4805 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
    %4807 = torch.aten.mul.Tensor %218, %4806 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
    %4808 = torch.aten.convolution %4801, %4807, %219, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4809 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4810 = torch.aten.detach %4809 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4811 = torch.aten.view %4620, %4660 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %4812 = torch.aten.abs %4811 : !torch.vtensor<[2,40,16,16,16],f16> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %values_120, %indices_121 = torch.aten.max.dim %4812, %int2, %true : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,40,1,16,16],f16>, !torch.vtensor<[2,40,1,16,16],si64> loc(#loc1)
    %4813 = torch.aten.view %values_120, %4663 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,1,16,16],f16> loc(#loc1)
    %4814 = torch.aten.broadcast_to %4813, %4660 : !torch.vtensor<[2,40,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %4815 = torch.aten.clone %4814, %int0 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,40,16,16,16],f16> loc(#loc1)
    %4816 = torch.aten.view %4815, %4643 : !torch.vtensor<[2,40,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4817 = torch.aten.sub.Tensor %4810, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4818 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4819 = torch.aten.pow.Tensor_Tensor %4818, %4817 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4820 = torch.aten.neg %4819 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4821 = torch.aten.neg %4820 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4822 = torch.aten.div.Tensor %4816, %4821 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4823 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4824 = torch.aten.detach %4823 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4825 = torch.aten.div.Tensor %4620, %4822 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4826 = torch.aten.add.Tensor %4825, %4824, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4827 = torch.aten.sub.Tensor %4810, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4828 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4829 = torch.aten.pow.Tensor_Tensor %4828, %4827 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4830 = torch.aten.neg %4829 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4831 = torch.aten.sub.Tensor %4810, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4832 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4833 = torch.aten.pow.Tensor_Tensor %4832, %4831 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4834 = torch.aten.sub.Tensor %4833, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4835 = torch.aten.gt.Tensor %4826, %4834 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
    %4836 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4837 = torch.aten.to.dtype %4836, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4838 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4839 = torch.aten.broadcast_to %4837, %4838 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4840 = torch.valsem.aten.copy %4839, %4834, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4841 = torch.aten.where.self %4835, %4840, %4826 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4842 = torch.aten.lt.Tensor %4841, %4830 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,640,16,16],i1> loc(#loc1)
    %4843 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4844 = torch.aten.to.dtype %4843, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4845 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4846 = torch.aten.broadcast_to %4844, %4845 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4847 = torch.valsem.aten.copy %4846, %4830, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4848 = torch.aten.where.self %4842, %4847, %4841 : !torch.vtensor<[2,640,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4849 = torch.aten.round %4848 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4850 = torch.aten.sub.Tensor %4849, %4824, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4851 = torch.aten.mul.Tensor %4850, %4822 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> loc(#loc1)
    %4852 = torch.prim.ListConstruct %int1280, %int40, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %4853 = torch.aten.broadcast_to %220, %4852 : !torch.vtensor<[1280,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,40,16,1,1],f16> loc(#loc1)
    %4854 = torch.aten.clone %4853, %int0 : !torch.vtensor<[1280,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,40,16,1,1],f16> loc(#loc1)
    %4855 = torch.prim.ListConstruct %int1280, %int640, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %4856 = torch.aten.view %4854, %4855 : !torch.vtensor<[1280,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,640,1,1],f16> loc(#loc1)
    %4857 = torch.aten.mul.Tensor %221, %4856 : !torch.vtensor<[1280,640,1,1],si8>, !torch.vtensor<[1280,640,1,1],f16> -> !torch.vtensor<[1280,640,1,1],f16> loc(#loc1)
    %4858 = torch.aten.convolution %4851, %4857, %222, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4859 = torch.aten.add.Tensor %4858, %4808, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4860 = torch.aten.div.Tensor %4859, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4861 = torch.aten.clone %4860, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4862 = torch.aten.view %4861, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
    %4863 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4864 = torch.aten.to.dtype %4863, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4865 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4866 = torch.aten.broadcast_to %4864, %4865 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4867 = torch.valsem.aten.copy %4866, %4862, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4868 = torch.aten.to.dtype %4867, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %4869 = torch.aten.sum.dim_IntList %4868, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4870 = torch.aten.div.Scalar %4869, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4871 = torch.aten.sub.Tensor %4868, %4870, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %4872 = torch.aten.mul.Tensor %4871, %4871 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %4873 = torch.aten.sum.dim_IntList %4872, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4874 = torch.aten.div.Scalar %4873, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %4875 = torch.aten.to.dtype %4874, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4876 = torch.aten.sum.dim_IntList %4867, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4877 = torch.aten.div.Scalar %4876, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4878 = torch.aten.add.Tensor %4875, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4879 = torch.aten.rsqrt %4878 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %4880 = torch.aten.sub.Tensor %4862, %4877, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4881 = torch.aten.mul.Tensor %4880, %4879 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %4882 = torch.aten.view %4881, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %4883 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4884 = torch.aten.to.dtype %4883, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4885 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4886 = torch.aten.broadcast_to %4884, %4885 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4887 = torch.valsem.aten.copy %4886, %4882, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4888 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4889 = torch.aten.detach %4888 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4890 = torch.aten.view %4887, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %4891 = torch.aten.abs %4890 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_122, %indices_123 = torch.aten.max.dim %4891, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %4892 = torch.aten.view %values_122, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %4893 = torch.aten.broadcast_to %4892, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %4894 = torch.aten.clone %4893, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %4895 = torch.aten.view %4894, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4896 = torch.aten.sub.Tensor %4889, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4897 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4898 = torch.aten.pow.Tensor_Tensor %4897, %4896 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4899 = torch.aten.neg %4898 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4900 = torch.aten.neg %4899 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4901 = torch.aten.div.Tensor %4895, %4900 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4902 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4903 = torch.aten.detach %4902 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4904 = torch.aten.div.Tensor %4887, %4901 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4905 = torch.aten.add.Tensor %4904, %4903, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4906 = torch.aten.sub.Tensor %4889, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4907 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4908 = torch.aten.pow.Tensor_Tensor %4907, %4906 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4909 = torch.aten.neg %4908 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4910 = torch.aten.sub.Tensor %4889, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4911 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4912 = torch.aten.pow.Tensor_Tensor %4911, %4910 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4913 = torch.aten.sub.Tensor %4912, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4914 = torch.aten.gt.Tensor %4905, %4913 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %4915 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4916 = torch.aten.to.dtype %4915, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4917 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4918 = torch.aten.broadcast_to %4916, %4917 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4919 = torch.valsem.aten.copy %4918, %4913, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4920 = torch.aten.where.self %4914, %4919, %4905 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4921 = torch.aten.lt.Tensor %4920, %4909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %4922 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4923 = torch.aten.to.dtype %4922, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4924 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4925 = torch.aten.broadcast_to %4923, %4924 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4926 = torch.valsem.aten.copy %4925, %4909, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4927 = torch.aten.where.self %4921, %4926, %4920 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4928 = torch.aten.round %4927 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4929 = torch.aten.sub.Tensor %4928, %4903, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4930 = torch.aten.mul.Tensor %4929, %4901 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4931 = torch.prim.ListConstruct %int1280, %int80, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %4932 = torch.aten.broadcast_to %223, %4931 : !torch.vtensor<[1280,80,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
    %4933 = torch.aten.clone %4932, %int0 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
    %4934 = torch.prim.ListConstruct %int1280, %int1280, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %4935 = torch.aten.view %4933, %4934 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
    %4936 = torch.aten.mul.Tensor %224, %4935 : !torch.vtensor<[1280,1280,1,1],si8>, !torch.vtensor<[1280,1280,1,1],f16> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
    %4937 = torch.aten.convolution %4930, %4936, %225, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %4938 = torch.aten.permute %4937, %1196 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
    %4939 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %4940 = torch.aten.view %4938, %4939 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4941 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %4942 = torch.aten.sum.dim_IntList %4940, %4941, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %4943 = torch.aten.div.Scalar %4942, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %4944 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4945 = torch.aten.broadcast_to %4943, %4944 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4946 = torch.aten.sub.Tensor %4940, %4945, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4947 = torch.aten.mul.Tensor %4946, %4946 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4948 = torch.aten.sum.dim_IntList %4947, %4941, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %4949 = torch.aten.div.Scalar %4948, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %4950 = torch.aten.add.Scalar %4949, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %4951 = torch.aten.rsqrt %4950 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %4952 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %4953 = torch.aten.broadcast_to %4951, %4952 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4954 = torch.aten.mul.Tensor %4946, %4953 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4955 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4956 = torch.aten.detach %4955 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4957 = torch.prim.ListConstruct %int2, %int256, %int80, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4958 = torch.aten.view %4954, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %4959 = torch.aten.abs %4958 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_124, %indices_125 = torch.aten.max.dim %4959, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %4960 = torch.prim.ListConstruct %int2, %int256, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %4961 = torch.aten.view %values_124, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %4962 = torch.aten.broadcast_to %4961, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %4963 = torch.aten.clone %4962, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %4964 = torch.aten.view %4963, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4965 = torch.aten.sub.Tensor %4956, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4966 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4967 = torch.aten.pow.Tensor_Tensor %4966, %4965 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4968 = torch.aten.neg %4967 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4969 = torch.aten.neg %4968 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4970 = torch.aten.div.Tensor %4964, %4969 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4971 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4972 = torch.aten.detach %4971 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4973 = torch.aten.div.Tensor %4954, %4970 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4974 = torch.aten.add.Tensor %4973, %4972, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4975 = torch.aten.sub.Tensor %4956, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4976 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4977 = torch.aten.pow.Tensor_Tensor %4976, %4975 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4978 = torch.aten.neg %4977 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4979 = torch.aten.sub.Tensor %4956, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4980 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %4981 = torch.aten.pow.Tensor_Tensor %4980, %4979 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %4982 = torch.aten.sub.Tensor %4981, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %4983 = torch.aten.gt.Tensor %4974, %4982 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %4984 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4985 = torch.aten.to.dtype %4984, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4986 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4987 = torch.aten.broadcast_to %4985, %4986 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4988 = torch.valsem.aten.copy %4987, %4982, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4989 = torch.aten.where.self %4983, %4988, %4974 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4990 = torch.aten.lt.Tensor %4989, %4978 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %4991 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %4992 = torch.aten.to.dtype %4991, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %4993 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %4994 = torch.aten.broadcast_to %4992, %4993 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %4995 = torch.valsem.aten.copy %4994, %4978, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %4996 = torch.aten.where.self %4990, %4995, %4989 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4997 = torch.aten.round %4996 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4998 = torch.aten.sub.Tensor %4997, %4972, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %4999 = torch.aten.mul.Tensor %4998, %4970 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5000 = torch.prim.ListConstruct %int1280, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5001 = torch.aten.broadcast_to %226, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5002 = torch.aten.clone %5001, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5003 = torch.prim.ListConstruct %int1280, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %5004 = torch.aten.view %5002, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5005 = torch.aten.mul.Tensor %227, %5004 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5006 = torch.aten.transpose.int %5005, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5007 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %5008 = torch.aten.view %4999, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5009 = torch.aten.mm %5008, %5006 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5010 = torch.aten.mul.Scalar %228, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5011 = torch.aten.add.Tensor %5010, %5009, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5012 = torch.aten.view %5011, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5013 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5014 = torch.aten.detach %5013 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5015 = torch.aten.view %4954, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5016 = torch.aten.abs %5015 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_126, %indices_127 = torch.aten.max.dim %5016, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5017 = torch.aten.view %values_126, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5018 = torch.aten.broadcast_to %5017, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5019 = torch.aten.clone %5018, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5020 = torch.aten.view %5019, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5021 = torch.aten.sub.Tensor %5014, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5022 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5023 = torch.aten.pow.Tensor_Tensor %5022, %5021 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5024 = torch.aten.neg %5023 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5025 = torch.aten.neg %5024 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5026 = torch.aten.div.Tensor %5020, %5025 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5027 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5028 = torch.aten.detach %5027 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5029 = torch.aten.div.Tensor %4954, %5026 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5030 = torch.aten.add.Tensor %5029, %5028, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5031 = torch.aten.sub.Tensor %5014, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5032 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5033 = torch.aten.pow.Tensor_Tensor %5032, %5031 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5034 = torch.aten.neg %5033 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5035 = torch.aten.sub.Tensor %5014, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5036 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5037 = torch.aten.pow.Tensor_Tensor %5036, %5035 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5038 = torch.aten.sub.Tensor %5037, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5039 = torch.aten.gt.Tensor %5030, %5038 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5040 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5041 = torch.aten.to.dtype %5040, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5042 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5043 = torch.aten.broadcast_to %5041, %5042 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5044 = torch.valsem.aten.copy %5043, %5038, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5045 = torch.aten.where.self %5039, %5044, %5030 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5046 = torch.aten.lt.Tensor %5045, %5034 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5047 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5048 = torch.aten.to.dtype %5047, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5049 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5050 = torch.aten.broadcast_to %5048, %5049 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5051 = torch.valsem.aten.copy %5050, %5034, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5052 = torch.aten.where.self %5046, %5051, %5045 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5053 = torch.aten.round %5052 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5054 = torch.aten.sub.Tensor %5053, %5028, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5055 = torch.aten.mul.Tensor %5054, %5026 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5056 = torch.aten.broadcast_to %229, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5057 = torch.aten.clone %5056, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5058 = torch.aten.view %5057, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5059 = torch.aten.mul.Tensor %230, %5058 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5060 = torch.aten.transpose.int %5059, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5061 = torch.aten.view %5055, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5062 = torch.aten.mm %5061, %5060 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5063 = torch.aten.mul.Scalar %231, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5064 = torch.aten.add.Tensor %5063, %5062, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5065 = torch.aten.view %5064, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5066 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5067 = torch.aten.detach %5066 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5068 = torch.aten.view %4954, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5069 = torch.aten.abs %5068 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_128, %indices_129 = torch.aten.max.dim %5069, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5070 = torch.aten.view %values_128, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5071 = torch.aten.broadcast_to %5070, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5072 = torch.aten.clone %5071, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5073 = torch.aten.view %5072, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5074 = torch.aten.sub.Tensor %5067, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5075 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5076 = torch.aten.pow.Tensor_Tensor %5075, %5074 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5077 = torch.aten.neg %5076 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5078 = torch.aten.neg %5077 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5079 = torch.aten.div.Tensor %5073, %5078 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5080 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5081 = torch.aten.detach %5080 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5082 = torch.aten.div.Tensor %4954, %5079 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5083 = torch.aten.add.Tensor %5082, %5081, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5084 = torch.aten.sub.Tensor %5067, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5085 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5086 = torch.aten.pow.Tensor_Tensor %5085, %5084 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5087 = torch.aten.neg %5086 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5088 = torch.aten.sub.Tensor %5067, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5089 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5090 = torch.aten.pow.Tensor_Tensor %5089, %5088 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5091 = torch.aten.sub.Tensor %5090, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5092 = torch.aten.gt.Tensor %5083, %5091 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5093 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5094 = torch.aten.to.dtype %5093, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5095 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5096 = torch.aten.broadcast_to %5094, %5095 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5097 = torch.valsem.aten.copy %5096, %5091, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5098 = torch.aten.where.self %5092, %5097, %5083 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5099 = torch.aten.lt.Tensor %5098, %5087 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5100 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5101 = torch.aten.to.dtype %5100, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5102 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5103 = torch.aten.broadcast_to %5101, %5102 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5104 = torch.valsem.aten.copy %5103, %5087, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5105 = torch.aten.where.self %5099, %5104, %5098 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5106 = torch.aten.round %5105 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5107 = torch.aten.sub.Tensor %5106, %5081, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5108 = torch.aten.mul.Tensor %5107, %5079 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5109 = torch.aten.broadcast_to %232, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5110 = torch.aten.clone %5109, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5111 = torch.aten.view %5110, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5112 = torch.aten.mul.Tensor %233, %5111 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5113 = torch.aten.transpose.int %5112, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5114 = torch.aten.view %5108, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5115 = torch.aten.mm %5114, %5113 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5116 = torch.aten.mul.Scalar %234, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5117 = torch.aten.add.Tensor %5116, %5115, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5118 = torch.aten.view %5117, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5119 = torch.prim.ListConstruct %int2, %int256, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %5120 = torch.aten.view %5012, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5121 = torch.aten.permute %5120, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5122 = torch.aten.clone %5121, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5123 = torch.prim.ListConstruct %int16, %int256, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5124 = torch.aten.view %5122, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5125 = torch.aten.view %5065, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5126 = torch.aten.permute %5125, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5127 = torch.aten.clone %5126, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5128 = torch.aten.view %5127, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5129 = torch.aten.view %5118, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5130 = torch.aten.permute %5129, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5131 = torch.aten.clone %5130, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5132 = torch.aten.view %5131, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5133 = torch.aten.transpose.int %5128, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
    %5134 = torch.aten.broadcast_to %5124, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5135 = torch.aten.view %5134, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5136 = torch.prim.ListConstruct %int16, %int160, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5137 = torch.aten.broadcast_to %5133, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
    %5138 = torch.aten.view %5137, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
    %5139 = torch.aten.bmm %5135, %5138 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5140 = torch.prim.ListConstruct %int16, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %5141 = torch.aten.view %5139, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5142 = torch.aten.mul.Tensor %5141, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %values_130, %indices_131 = torch.aten.max.dim %5142, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
    %5143 = torch.aten.sub.Tensor %5142, %values_130, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5144 = torch.aten.exp %5143 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5145 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %5146 = torch.aten.sum.dim_IntList %5144, %5145, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
    %5147 = torch.aten.div.Tensor %5144, %5146 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5148 = torch.aten.broadcast_to %5147, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5149 = torch.aten.view %5148, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %5150 = torch.aten.broadcast_to %5132, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5151 = torch.aten.view %5150, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5152 = torch.aten.bmm %5149, %5151 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5153 = torch.aten.view %5152, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5154 = torch.prim.ListConstruct %int2, %int8, %int256, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %5155 = torch.aten.view %5153, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5156 = torch.aten.permute %5155, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5157 = torch.aten.clone %5156, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5158 = torch.aten.view %5157, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5159 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5160 = torch.aten.detach %5159 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5161 = torch.aten.view %5158, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5162 = torch.aten.abs %5161 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_132, %indices_133 = torch.aten.max.dim %5162, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5163 = torch.aten.view %values_132, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5164 = torch.aten.broadcast_to %5163, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5165 = torch.aten.clone %5164, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5166 = torch.aten.view %5165, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5167 = torch.aten.sub.Tensor %5160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5168 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5169 = torch.aten.pow.Tensor_Tensor %5168, %5167 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5170 = torch.aten.neg %5169 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5171 = torch.aten.neg %5170 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5172 = torch.aten.div.Tensor %5166, %5171 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5173 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5174 = torch.aten.detach %5173 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5175 = torch.aten.div.Tensor %5158, %5172 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5176 = torch.aten.add.Tensor %5175, %5174, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5177 = torch.aten.sub.Tensor %5160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5178 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5179 = torch.aten.pow.Tensor_Tensor %5178, %5177 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5180 = torch.aten.neg %5179 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5181 = torch.aten.sub.Tensor %5160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5182 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5183 = torch.aten.pow.Tensor_Tensor %5182, %5181 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5184 = torch.aten.sub.Tensor %5183, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5185 = torch.aten.gt.Tensor %5176, %5184 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5186 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5187 = torch.aten.to.dtype %5186, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5188 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5189 = torch.aten.broadcast_to %5187, %5188 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5190 = torch.valsem.aten.copy %5189, %5184, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5191 = torch.aten.where.self %5185, %5190, %5176 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5192 = torch.aten.lt.Tensor %5191, %5180 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5193 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5194 = torch.aten.to.dtype %5193, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5195 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5196 = torch.aten.broadcast_to %5194, %5195 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5197 = torch.valsem.aten.copy %5196, %5180, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5198 = torch.aten.where.self %5192, %5197, %5191 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5199 = torch.aten.round %5198 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5200 = torch.aten.sub.Tensor %5199, %5174, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5201 = torch.aten.mul.Tensor %5200, %5172 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5202 = torch.aten.broadcast_to %235, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5203 = torch.aten.clone %5202, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5204 = torch.aten.view %5203, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5205 = torch.aten.mul.Tensor %236, %5204 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5206 = torch.aten.transpose.int %5205, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5207 = torch.aten.view %5201, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5208 = torch.aten.mm %5207, %5206 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5209 = torch.aten.mul.Scalar %237, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5210 = torch.aten.add.Tensor %5209, %5208, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5211 = torch.aten.view %5210, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5212 = torch.aten.add.Tensor %5211, %4940, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5213 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %5214 = torch.aten.sum.dim_IntList %5212, %5213, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5215 = torch.aten.div.Scalar %5214, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5216 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5217 = torch.aten.broadcast_to %5215, %5216 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5218 = torch.aten.sub.Tensor %5212, %5217, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5219 = torch.aten.mul.Tensor %5218, %5218 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5220 = torch.aten.sum.dim_IntList %5219, %5213, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5221 = torch.aten.div.Scalar %5220, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5222 = torch.aten.add.Scalar %5221, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5223 = torch.aten.rsqrt %5222 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5224 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5225 = torch.aten.broadcast_to %5223, %5224 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5226 = torch.aten.mul.Tensor %5218, %5225 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5227 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5228 = torch.aten.detach %5227 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5229 = torch.aten.view %5226, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5230 = torch.aten.abs %5229 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_134, %indices_135 = torch.aten.max.dim %5230, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5231 = torch.aten.view %values_134, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5232 = torch.aten.broadcast_to %5231, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5233 = torch.aten.clone %5232, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5234 = torch.aten.view %5233, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5235 = torch.aten.sub.Tensor %5228, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5236 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5237 = torch.aten.pow.Tensor_Tensor %5236, %5235 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5238 = torch.aten.neg %5237 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5239 = torch.aten.neg %5238 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5240 = torch.aten.div.Tensor %5234, %5239 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5241 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5242 = torch.aten.detach %5241 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5243 = torch.aten.div.Tensor %5226, %5240 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5244 = torch.aten.add.Tensor %5243, %5242, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5245 = torch.aten.sub.Tensor %5228, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5246 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5247 = torch.aten.pow.Tensor_Tensor %5246, %5245 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5248 = torch.aten.neg %5247 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5249 = torch.aten.sub.Tensor %5228, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5250 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5251 = torch.aten.pow.Tensor_Tensor %5250, %5249 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5252 = torch.aten.sub.Tensor %5251, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5253 = torch.aten.gt.Tensor %5244, %5252 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5254 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5255 = torch.aten.to.dtype %5254, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5256 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5257 = torch.aten.broadcast_to %5255, %5256 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5258 = torch.valsem.aten.copy %5257, %5252, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5259 = torch.aten.where.self %5253, %5258, %5244 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5260 = torch.aten.lt.Tensor %5259, %5248 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5261 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5262 = torch.aten.to.dtype %5261, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5263 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5264 = torch.aten.broadcast_to %5262, %5263 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5265 = torch.valsem.aten.copy %5264, %5248, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5266 = torch.aten.where.self %5260, %5265, %5259 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5267 = torch.aten.round %5266 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5268 = torch.aten.sub.Tensor %5267, %5242, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5269 = torch.aten.mul.Tensor %5268, %5240 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5270 = torch.aten.broadcast_to %238, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5271 = torch.aten.clone %5270, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5272 = torch.aten.view %5271, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5273 = torch.aten.mul.Tensor %239, %5272 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5274 = torch.aten.transpose.int %5273, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5275 = torch.aten.view %5269, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5276 = torch.aten.mm %5275, %5274 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5277 = torch.aten.mul.Scalar %240, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5278 = torch.aten.add.Tensor %5277, %5276, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5279 = torch.aten.view %5278, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5280 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5281 = torch.aten.detach %5280 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5282 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %5283 = torch.aten.abs %5282 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_136, %indices_137 = torch.aten.max.dim %5283, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %5284 = torch.aten.view %values_136, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %5285 = torch.aten.broadcast_to %5284, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %5286 = torch.aten.clone %5285, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %5287 = torch.aten.view %5286, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5288 = torch.aten.sub.Tensor %5281, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5289 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5290 = torch.aten.pow.Tensor_Tensor %5289, %5288 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5291 = torch.aten.neg %5290 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5292 = torch.aten.neg %5291 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5293 = torch.aten.div.Tensor %5287, %5292 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5294 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5295 = torch.aten.detach %5294 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5296 = torch.aten.div.Tensor %arg2, %5293 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5297 = torch.aten.add.Tensor %5296, %5295, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5298 = torch.aten.sub.Tensor %5281, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5299 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5300 = torch.aten.pow.Tensor_Tensor %5299, %5298 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5301 = torch.aten.neg %5300 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5302 = torch.aten.sub.Tensor %5281, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5303 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5304 = torch.aten.pow.Tensor_Tensor %5303, %5302 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5305 = torch.aten.sub.Tensor %5304, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5306 = torch.aten.gt.Tensor %5297, %5305 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %5307 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5308 = torch.aten.to.dtype %5307, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5309 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5310 = torch.aten.broadcast_to %5308, %5309 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5311 = torch.valsem.aten.copy %5310, %5305, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5312 = torch.aten.where.self %5306, %5311, %5297 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5313 = torch.aten.lt.Tensor %5312, %5301 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %5314 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5315 = torch.aten.to.dtype %5314, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5316 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5317 = torch.aten.broadcast_to %5315, %5316 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5318 = torch.valsem.aten.copy %5317, %5301, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5319 = torch.aten.where.self %5313, %5318, %5312 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5320 = torch.aten.round %5319 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5321 = torch.aten.sub.Tensor %5320, %5295, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5322 = torch.aten.mul.Tensor %5321, %5293 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5323 = torch.prim.ListConstruct %int1280, %int48, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5324 = torch.aten.broadcast_to %241, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %5325 = torch.aten.clone %5324, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %5326 = torch.prim.ListConstruct %int1280, %int768 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %5327 = torch.aten.view %5325, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %5328 = torch.aten.mul.Tensor %242, %5327 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %5329 = torch.aten.transpose.int %5328, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
    %5330 = torch.aten.view %5322, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %5331 = torch.aten.mm %5330, %5329 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
    %5332 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc0)
    %5333 = torch.aten.view %5331, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
    %5334 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5335 = torch.aten.detach %5334 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5336 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %5337 = torch.aten.abs %5336 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_138, %indices_139 = torch.aten.max.dim %5337, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %5338 = torch.aten.view %values_138, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %5339 = torch.aten.broadcast_to %5338, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %5340 = torch.aten.clone %5339, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %5341 = torch.aten.view %5340, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5342 = torch.aten.sub.Tensor %5335, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5343 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5344 = torch.aten.pow.Tensor_Tensor %5343, %5342 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5345 = torch.aten.neg %5344 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5346 = torch.aten.neg %5345 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5347 = torch.aten.div.Tensor %5341, %5346 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5348 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5349 = torch.aten.detach %5348 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5350 = torch.aten.div.Tensor %arg2, %5347 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5351 = torch.aten.add.Tensor %5350, %5349, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5352 = torch.aten.sub.Tensor %5335, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5353 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5354 = torch.aten.pow.Tensor_Tensor %5353, %5352 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5355 = torch.aten.neg %5354 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5356 = torch.aten.sub.Tensor %5335, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5357 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5358 = torch.aten.pow.Tensor_Tensor %5357, %5356 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5359 = torch.aten.sub.Tensor %5358, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5360 = torch.aten.gt.Tensor %5351, %5359 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %5361 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5362 = torch.aten.to.dtype %5361, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5363 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5364 = torch.aten.broadcast_to %5362, %5363 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5365 = torch.valsem.aten.copy %5364, %5359, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5366 = torch.aten.where.self %5360, %5365, %5351 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5367 = torch.aten.lt.Tensor %5366, %5355 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %5368 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5369 = torch.aten.to.dtype %5368, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5370 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5371 = torch.aten.broadcast_to %5369, %5370 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5372 = torch.valsem.aten.copy %5371, %5355, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5373 = torch.aten.where.self %5367, %5372, %5366 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5374 = torch.aten.round %5373 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5375 = torch.aten.sub.Tensor %5374, %5349, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5376 = torch.aten.mul.Tensor %5375, %5347 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %5377 = torch.aten.broadcast_to %243, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %5378 = torch.aten.clone %5377, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %5379 = torch.aten.view %5378, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %5380 = torch.aten.mul.Tensor %244, %5379 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %5381 = torch.aten.transpose.int %5380, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
    %5382 = torch.aten.view %5376, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %5383 = torch.aten.mm %5382, %5381 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
    %5384 = torch.aten.view %5383, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
    %5385 = torch.aten.view %5279, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5386 = torch.aten.permute %5385, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5387 = torch.aten.clone %5386, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5388 = torch.aten.view %5387, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5389 = torch.prim.ListConstruct %int2, %int77, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %5390 = torch.aten.view %5333, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
    %5391 = torch.aten.permute %5390, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %5392 = torch.aten.clone %5391, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %5393 = torch.prim.ListConstruct %int16, %int77, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5394 = torch.aten.view %5392, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %5395 = torch.aten.view %5384, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
    %5396 = torch.aten.permute %5395, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %5397 = torch.aten.clone %5396, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %5398 = torch.aten.view %5397, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %5399 = torch.aten.transpose.int %5394, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
    %5400 = torch.aten.broadcast_to %5388, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5401 = torch.aten.view %5400, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5402 = torch.prim.ListConstruct %int16, %int160, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5403 = torch.aten.broadcast_to %5399, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
    %5404 = torch.aten.view %5403, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
    %5405 = torch.aten.bmm %5401, %5404 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5406 = torch.prim.ListConstruct %int16, %int256, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %5407 = torch.aten.view %5405, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5408 = torch.aten.mul.Tensor %5407, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %values_140, %indices_141 = torch.aten.max.dim %5408, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
    %5409 = torch.aten.sub.Tensor %5408, %values_140, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5410 = torch.aten.exp %5409 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5411 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %5412 = torch.aten.sum.dim_IntList %5410, %5411, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
    %5413 = torch.aten.div.Tensor %5410, %5412 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5414 = torch.aten.broadcast_to %5413, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5415 = torch.aten.view %5414, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %5416 = torch.aten.broadcast_to %5398, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %5417 = torch.aten.view %5416, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %5418 = torch.aten.bmm %5415, %5417 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5419 = torch.aten.view %5418, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %5420 = torch.aten.view %5419, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %5421 = torch.aten.permute %5420, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5422 = torch.aten.clone %5421, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %5423 = torch.aten.view %5422, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5424 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5425 = torch.aten.detach %5424 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5426 = torch.aten.view %5423, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5427 = torch.aten.abs %5426 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_142, %indices_143 = torch.aten.max.dim %5427, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5428 = torch.aten.view %values_142, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5429 = torch.aten.broadcast_to %5428, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5430 = torch.aten.clone %5429, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5431 = torch.aten.view %5430, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5432 = torch.aten.sub.Tensor %5425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5433 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5434 = torch.aten.pow.Tensor_Tensor %5433, %5432 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5435 = torch.aten.neg %5434 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5436 = torch.aten.neg %5435 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5437 = torch.aten.div.Tensor %5431, %5436 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5438 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5439 = torch.aten.detach %5438 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5440 = torch.aten.div.Tensor %5423, %5437 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5441 = torch.aten.add.Tensor %5440, %5439, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5442 = torch.aten.sub.Tensor %5425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5443 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5444 = torch.aten.pow.Tensor_Tensor %5443, %5442 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5445 = torch.aten.neg %5444 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5446 = torch.aten.sub.Tensor %5425, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5447 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5448 = torch.aten.pow.Tensor_Tensor %5447, %5446 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5449 = torch.aten.sub.Tensor %5448, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5450 = torch.aten.gt.Tensor %5441, %5449 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5451 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5452 = torch.aten.to.dtype %5451, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5453 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5454 = torch.aten.broadcast_to %5452, %5453 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5455 = torch.valsem.aten.copy %5454, %5449, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5456 = torch.aten.where.self %5450, %5455, %5441 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5457 = torch.aten.lt.Tensor %5456, %5445 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5458 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5459 = torch.aten.to.dtype %5458, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5460 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5461 = torch.aten.broadcast_to %5459, %5460 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5462 = torch.valsem.aten.copy %5461, %5445, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5463 = torch.aten.where.self %5457, %5462, %5456 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5464 = torch.aten.round %5463 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5465 = torch.aten.sub.Tensor %5464, %5439, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5466 = torch.aten.mul.Tensor %5465, %5437 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5467 = torch.aten.broadcast_to %245, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5468 = torch.aten.clone %5467, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5469 = torch.aten.view %5468, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5470 = torch.aten.mul.Tensor %246, %5469 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5471 = torch.aten.transpose.int %5470, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5472 = torch.aten.view %5466, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5473 = torch.aten.mm %5472, %5471 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5474 = torch.aten.mul.Scalar %247, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5475 = torch.aten.add.Tensor %5474, %5473, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5476 = torch.aten.view %5475, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5477 = torch.aten.add.Tensor %5476, %5212, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5478 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %5479 = torch.aten.sum.dim_IntList %5477, %5478, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5480 = torch.aten.div.Scalar %5479, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5481 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5482 = torch.aten.broadcast_to %5480, %5481 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5483 = torch.aten.sub.Tensor %5477, %5482, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5484 = torch.aten.mul.Tensor %5483, %5483 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5485 = torch.aten.sum.dim_IntList %5484, %5478, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5486 = torch.aten.div.Scalar %5485, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5487 = torch.aten.add.Scalar %5486, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5488 = torch.aten.rsqrt %5487 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5489 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5490 = torch.aten.broadcast_to %5488, %5489 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5491 = torch.aten.mul.Tensor %5483, %5490 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5492 = torch.prim.ListConstruct %int10240, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5493 = torch.aten.broadcast_to %248, %5492 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
    %5494 = torch.aten.clone %5493, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
    %5495 = torch.prim.ListConstruct %int10240, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %5496 = torch.aten.view %5494, %5495 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
    %5497 = torch.aten.mul.Tensor %249, %5496 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
    %5498 = torch.aten.transpose.int %5497, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> loc(#loc1)
    %5499 = torch.aten.view %5491, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5500 = torch.aten.mm %5499, %5498 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16> loc(#loc1)
    %5501 = torch.aten.mul.Scalar %250, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> loc(#loc1)
    %5502 = torch.aten.add.Tensor %5501, %5500, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16> loc(#loc1)
    %5503 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %5504 = torch.aten.view %5502, %5503 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16> loc(#loc1)
    %5505 = torch.aten.slice.Tensor %5504, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %5506 = torch.aten.slice.Tensor %5504, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %5507 = torch.aten.gelu %5506, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %5508 = torch.aten.mul.Tensor %5505, %5507 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %5509 = torch.prim.ListConstruct %int1280, %int320, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc2)
    %5510 = torch.aten.broadcast_to %251, %5509 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
    %5511 = torch.aten.clone %5510, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
    %5512 = torch.prim.ListConstruct %int1280, %int5120 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc4)
    %5513 = torch.aten.view %5511, %5512 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
    %5514 = torch.aten.mul.Tensor %252, %5513 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
    %5515 = torch.aten.transpose.int %5514, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> loc(#loc1)
    %5516 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc3)
    %5517 = torch.aten.view %5508, %5516 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16> loc(#loc1)
    %5518 = torch.aten.mm %5517, %5515 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5519 = torch.aten.mul.Scalar %253, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5520 = torch.aten.add.Tensor %5519, %5518, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5521 = torch.aten.view %5520, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5522 = torch.aten.add.Tensor %5521, %5477, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5523 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc5)
    %5524 = torch.aten.view %5522, %5523 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
    %5525 = torch.aten.permute %5524, %1789 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5526 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5527 = torch.aten.detach %5526 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5528 = torch.aten.view %5525, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5529 = torch.aten.abs %5528 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_144, %indices_145 = torch.aten.max.dim %5529, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %5530 = torch.aten.view %values_144, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %5531 = torch.aten.broadcast_to %5530, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5532 = torch.aten.clone %5531, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5533 = torch.aten.view %5532, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5534 = torch.aten.sub.Tensor %5527, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5535 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5536 = torch.aten.pow.Tensor_Tensor %5535, %5534 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5537 = torch.aten.neg %5536 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5538 = torch.aten.neg %5537 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5539 = torch.aten.div.Tensor %5533, %5538 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5540 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5541 = torch.aten.detach %5540 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5542 = torch.aten.div.Tensor %5525, %5539 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5543 = torch.aten.add.Tensor %5542, %5541, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5544 = torch.aten.sub.Tensor %5527, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5545 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5546 = torch.aten.pow.Tensor_Tensor %5545, %5544 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5547 = torch.aten.neg %5546 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5548 = torch.aten.sub.Tensor %5527, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5549 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5550 = torch.aten.pow.Tensor_Tensor %5549, %5548 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5551 = torch.aten.sub.Tensor %5550, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5552 = torch.aten.gt.Tensor %5543, %5551 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5553 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5554 = torch.aten.to.dtype %5553, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5555 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5556 = torch.aten.broadcast_to %5554, %5555 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5557 = torch.valsem.aten.copy %5556, %5551, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5558 = torch.aten.where.self %5552, %5557, %5543 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5559 = torch.aten.lt.Tensor %5558, %5547 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5560 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5561 = torch.aten.to.dtype %5560, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5562 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5563 = torch.aten.broadcast_to %5561, %5562 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5564 = torch.valsem.aten.copy %5563, %5547, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5565 = torch.aten.where.self %5559, %5564, %5558 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5566 = torch.aten.round %5565 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5567 = torch.aten.sub.Tensor %5566, %5541, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5568 = torch.aten.mul.Tensor %5567, %5539 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5569 = torch.aten.broadcast_to %254, %4931 : !torch.vtensor<[1280,80,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
    %5570 = torch.aten.clone %5569, %int0 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
    %5571 = torch.aten.view %5570, %4934 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
    %5572 = torch.aten.mul.Tensor %255, %5571 : !torch.vtensor<[1280,1280,1,1],si8>, !torch.vtensor<[1280,1280,1,1],f16> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
    %5573 = torch.aten.convolution %5568, %5572, %256, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5574 = torch.aten.add.Tensor %5573, %4860, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5575 = torch.aten.clone %5574, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5576 = torch.aten.view %5575, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
    %5577 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5578 = torch.aten.to.dtype %5577, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5579 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5580 = torch.aten.broadcast_to %5578, %5579 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5581 = torch.valsem.aten.copy %5580, %5576, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5582 = torch.aten.to.dtype %5581, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5583 = torch.aten.sum.dim_IntList %5582, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5584 = torch.aten.div.Scalar %5583, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5585 = torch.aten.sub.Tensor %5582, %5584, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5586 = torch.aten.mul.Tensor %5585, %5585 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5587 = torch.aten.sum.dim_IntList %5586, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5588 = torch.aten.div.Scalar %5587, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5589 = torch.aten.to.dtype %5588, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5590 = torch.aten.sum.dim_IntList %5581, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5591 = torch.aten.div.Scalar %5590, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5592 = torch.aten.add.Tensor %5589, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5593 = torch.aten.rsqrt %5592 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5594 = torch.aten.sub.Tensor %5576, %5591, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5595 = torch.aten.mul.Tensor %5594, %5593 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5596 = torch.aten.view %5595, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5597 = torch.aten.unsqueeze %257, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
    %5598 = torch.aten.unsqueeze %5597, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
    %5599 = torch.aten.mul.Tensor %5596, %5598 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5600 = torch.aten.unsqueeze %258, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
    %5601 = torch.aten.unsqueeze %5600, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
    %5602 = torch.aten.add.Tensor %5599, %5601, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5603 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5604 = torch.aten.to.dtype %5603, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5605 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5606 = torch.aten.broadcast_to %5604, %5605 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5607 = torch.valsem.aten.copy %5606, %5602, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5608 = torch.aten.sigmoid %5607 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5609 = torch.aten.mul.Tensor %5608, %5607 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5610 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5611 = torch.aten.detach %5610 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5612 = torch.aten.view %5609, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5613 = torch.aten.abs %5612 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_146, %indices_147 = torch.aten.max.dim %5613, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %5614 = torch.aten.view %values_146, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %5615 = torch.aten.broadcast_to %5614, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5616 = torch.aten.clone %5615, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5617 = torch.aten.view %5616, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5618 = torch.aten.sub.Tensor %5611, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5619 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5620 = torch.aten.pow.Tensor_Tensor %5619, %5618 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5621 = torch.aten.neg %5620 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5622 = torch.aten.neg %5621 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5623 = torch.aten.div.Tensor %5617, %5622 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5624 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5625 = torch.aten.detach %5624 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5626 = torch.aten.div.Tensor %5609, %5623 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5627 = torch.aten.add.Tensor %5626, %5625, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5628 = torch.aten.sub.Tensor %5611, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5629 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5630 = torch.aten.pow.Tensor_Tensor %5629, %5628 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5631 = torch.aten.neg %5630 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5632 = torch.aten.sub.Tensor %5611, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5633 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5634 = torch.aten.pow.Tensor_Tensor %5633, %5632 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5635 = torch.aten.sub.Tensor %5634, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5636 = torch.aten.gt.Tensor %5627, %5635 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5637 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5638 = torch.aten.to.dtype %5637, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5639 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5640 = torch.aten.broadcast_to %5638, %5639 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5641 = torch.valsem.aten.copy %5640, %5635, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5642 = torch.aten.where.self %5636, %5641, %5627 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5643 = torch.aten.lt.Tensor %5642, %5631 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5644 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5645 = torch.aten.to.dtype %5644, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5646 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5647 = torch.aten.broadcast_to %5645, %5646 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5648 = torch.valsem.aten.copy %5647, %5631, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5649 = torch.aten.where.self %5643, %5648, %5642 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5650 = torch.aten.round %5649 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5651 = torch.aten.sub.Tensor %5650, %5625, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5652 = torch.aten.mul.Tensor %5651, %5623 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5653 = torch.aten.broadcast_to %259, %4802 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
    %5654 = torch.aten.clone %5653, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
    %5655 = torch.aten.view %5654, %4805 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
    %5656 = torch.aten.mul.Tensor %260, %5655 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
    %5657 = torch.aten.convolution %5652, %5656, %261, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5658 = torch.aten.sigmoid %932 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %5659 = torch.aten.mul.Tensor %5658, %932 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %5660 = torch.aten.transpose.int %262, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5661 = torch.aten.mm %5659, %5660 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %5662 = torch.aten.mul.Scalar %263, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5663 = torch.aten.add.Tensor %5662, %5661, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %5664 = torch.aten.slice.Tensor %5663, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %5665 = torch.aten.slice.Tensor %5664, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16> loc(#loc1)
    %5666 = torch.aten.unsqueeze %5665, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> loc(#loc1)
    %5667 = torch.aten.unsqueeze %5666, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> loc(#loc1)
    %5668 = torch.aten.add.Tensor %5657, %5667, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5669 = torch.aten.view %5668, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
    %5670 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5671 = torch.aten.to.dtype %5670, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5672 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5673 = torch.aten.broadcast_to %5671, %5672 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5674 = torch.valsem.aten.copy %5673, %5669, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5675 = torch.aten.to.dtype %5674, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5676 = torch.aten.sum.dim_IntList %5675, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5677 = torch.aten.div.Scalar %5676, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5678 = torch.aten.sub.Tensor %5675, %5677, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5679 = torch.aten.mul.Tensor %5678, %5678 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5680 = torch.aten.sum.dim_IntList %5679, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5681 = torch.aten.div.Scalar %5680, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5682 = torch.aten.to.dtype %5681, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5683 = torch.aten.sum.dim_IntList %5674, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5684 = torch.aten.div.Scalar %5683, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5685 = torch.aten.add.Tensor %5682, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5686 = torch.aten.rsqrt %5685 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5687 = torch.aten.sub.Tensor %5669, %5684, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5688 = torch.aten.mul.Tensor %5687, %5686 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5689 = torch.aten.view %5688, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5690 = torch.aten.unsqueeze %264, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
    %5691 = torch.aten.unsqueeze %5690, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
    %5692 = torch.aten.mul.Tensor %5689, %5691 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5693 = torch.aten.unsqueeze %265, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16> loc(#loc1)
    %5694 = torch.aten.unsqueeze %5693, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16> loc(#loc1)
    %5695 = torch.aten.add.Tensor %5692, %5694, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5696 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5697 = torch.aten.to.dtype %5696, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5698 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5699 = torch.aten.broadcast_to %5697, %5698 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5700 = torch.valsem.aten.copy %5699, %5695, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5701 = torch.aten.sigmoid %5700 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5702 = torch.aten.mul.Tensor %5701, %5700 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5703 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5704 = torch.aten.detach %5703 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5705 = torch.aten.view %5702, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5706 = torch.aten.abs %5705 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_148, %indices_149 = torch.aten.max.dim %5706, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %5707 = torch.aten.view %values_148, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %5708 = torch.aten.broadcast_to %5707, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5709 = torch.aten.clone %5708, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5710 = torch.aten.view %5709, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5711 = torch.aten.sub.Tensor %5704, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5712 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5713 = torch.aten.pow.Tensor_Tensor %5712, %5711 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5714 = torch.aten.neg %5713 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5715 = torch.aten.neg %5714 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5716 = torch.aten.div.Tensor %5710, %5715 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5717 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5718 = torch.aten.detach %5717 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5719 = torch.aten.div.Tensor %5702, %5716 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5720 = torch.aten.add.Tensor %5719, %5718, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5721 = torch.aten.sub.Tensor %5704, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5722 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5723 = torch.aten.pow.Tensor_Tensor %5722, %5721 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5724 = torch.aten.neg %5723 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5725 = torch.aten.sub.Tensor %5704, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5726 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5727 = torch.aten.pow.Tensor_Tensor %5726, %5725 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5728 = torch.aten.sub.Tensor %5727, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5729 = torch.aten.gt.Tensor %5720, %5728 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5730 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5731 = torch.aten.to.dtype %5730, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5732 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5733 = torch.aten.broadcast_to %5731, %5732 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5734 = torch.valsem.aten.copy %5733, %5728, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5735 = torch.aten.where.self %5729, %5734, %5720 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5736 = torch.aten.lt.Tensor %5735, %5724 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5737 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5738 = torch.aten.to.dtype %5737, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5739 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5740 = torch.aten.broadcast_to %5738, %5739 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5741 = torch.valsem.aten.copy %5740, %5724, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5742 = torch.aten.where.self %5736, %5741, %5735 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5743 = torch.aten.round %5742 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5744 = torch.aten.sub.Tensor %5743, %5718, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5745 = torch.aten.mul.Tensor %5744, %5716 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5746 = torch.aten.broadcast_to %266, %4802 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
    %5747 = torch.aten.clone %5746, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16> loc(#loc1)
    %5748 = torch.aten.view %5747, %4805 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
    %5749 = torch.aten.mul.Tensor %267, %5748 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16> loc(#loc1)
    %5750 = torch.aten.convolution %5745, %5749, %268, %933, %933, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5751 = torch.aten.add.Tensor %5574, %5750, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5752 = torch.aten.div.Tensor %5751, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5753 = torch.aten.clone %5752, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5754 = torch.aten.view %5753, %4721 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> loc(#loc1)
    %5755 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5756 = torch.aten.to.dtype %5755, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5757 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5758 = torch.aten.broadcast_to %5756, %5757 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5759 = torch.valsem.aten.copy %5758, %5754, %false : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,40,256],f16>, !torch.bool -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5760 = torch.aten.to.dtype %5759, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5761 = torch.aten.sum.dim_IntList %5760, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5762 = torch.aten.div.Scalar %5761, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5763 = torch.aten.sub.Tensor %5760, %5762, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5764 = torch.aten.mul.Tensor %5763, %5763 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> loc(#loc1)
    %5765 = torch.aten.sum.dim_IntList %5764, %943, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5766 = torch.aten.div.Scalar %5765, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> loc(#loc1)
    %5767 = torch.aten.to.dtype %5766, %int6, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5768 = torch.aten.sum.dim_IntList %5759, %943, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5769 = torch.aten.div.Scalar %5768, %int10240 : !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5770 = torch.aten.add.Tensor %5767, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5771 = torch.aten.rsqrt %5770 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32> loc(#loc1)
    %5772 = torch.aten.sub.Tensor %5754, %5769, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5773 = torch.aten.mul.Tensor %5772, %5771 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32> loc(#loc1)
    %5774 = torch.aten.view %5773, %4742 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32> loc(#loc1)
    %5775 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5776 = torch.aten.to.dtype %5775, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5777 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5778 = torch.aten.broadcast_to %5776, %5777 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5779 = torch.valsem.aten.copy %5778, %5774, %false : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f32>, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5780 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5781 = torch.aten.detach %5780 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5782 = torch.aten.view %5779, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5783 = torch.aten.abs %5782 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_150, %indices_151 = torch.aten.max.dim %5783, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %5784 = torch.aten.view %values_150, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %5785 = torch.aten.broadcast_to %5784, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5786 = torch.aten.clone %5785, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %5787 = torch.aten.view %5786, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5788 = torch.aten.sub.Tensor %5781, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5789 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5790 = torch.aten.pow.Tensor_Tensor %5789, %5788 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5791 = torch.aten.neg %5790 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5792 = torch.aten.neg %5791 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5793 = torch.aten.div.Tensor %5787, %5792 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5794 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5795 = torch.aten.detach %5794 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5796 = torch.aten.div.Tensor %5779, %5793 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5797 = torch.aten.add.Tensor %5796, %5795, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5798 = torch.aten.sub.Tensor %5781, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5799 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5800 = torch.aten.pow.Tensor_Tensor %5799, %5798 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5801 = torch.aten.neg %5800 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5802 = torch.aten.sub.Tensor %5781, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5803 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5804 = torch.aten.pow.Tensor_Tensor %5803, %5802 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5805 = torch.aten.sub.Tensor %5804, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5806 = torch.aten.gt.Tensor %5797, %5805 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5807 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5808 = torch.aten.to.dtype %5807, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5809 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5810 = torch.aten.broadcast_to %5808, %5809 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5811 = torch.valsem.aten.copy %5810, %5805, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5812 = torch.aten.where.self %5806, %5811, %5797 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5813 = torch.aten.lt.Tensor %5812, %5801 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],i1> loc(#loc1)
    %5814 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5815 = torch.aten.to.dtype %5814, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5816 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5817 = torch.aten.broadcast_to %5815, %5816 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5818 = torch.valsem.aten.copy %5817, %5801, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5819 = torch.aten.where.self %5813, %5818, %5812 : !torch.vtensor<[2,1280,16,16],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5820 = torch.aten.round %5819 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5821 = torch.aten.sub.Tensor %5820, %5795, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5822 = torch.aten.mul.Tensor %5821, %5793 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5823 = torch.aten.broadcast_to %269, %4931 : !torch.vtensor<[1280,80,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
    %5824 = torch.aten.clone %5823, %int0 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,80,16,1,1],f16> loc(#loc1)
    %5825 = torch.aten.view %5824, %4934 : !torch.vtensor<[1280,80,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
    %5826 = torch.aten.mul.Tensor %270, %5825 : !torch.vtensor<[1280,1280,1,1],si8>, !torch.vtensor<[1280,1280,1,1],f16> -> !torch.vtensor<[1280,1280,1,1],f16> loc(#loc1)
    %5827 = torch.aten.convolution %5822, %5826, %271, %933, %934, %933, %false, %934, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %5828 = torch.aten.permute %5827, %1196 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
    %5829 = torch.aten.view %5828, %4939 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5830 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %5831 = torch.aten.sum.dim_IntList %5829, %5830, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5832 = torch.aten.div.Scalar %5831, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5833 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5834 = torch.aten.broadcast_to %5832, %5833 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5835 = torch.aten.sub.Tensor %5829, %5834, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5836 = torch.aten.mul.Tensor %5835, %5835 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5837 = torch.aten.sum.dim_IntList %5836, %5830, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5838 = torch.aten.div.Scalar %5837, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5839 = torch.aten.add.Scalar %5838, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5840 = torch.aten.rsqrt %5839 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %5841 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %5842 = torch.aten.broadcast_to %5840, %5841 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5843 = torch.aten.mul.Tensor %5835, %5842 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5844 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5845 = torch.aten.detach %5844 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5846 = torch.aten.view %5843, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5847 = torch.aten.abs %5846 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_152, %indices_153 = torch.aten.max.dim %5847, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5848 = torch.aten.view %values_152, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5849 = torch.aten.broadcast_to %5848, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5850 = torch.aten.clone %5849, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5851 = torch.aten.view %5850, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5852 = torch.aten.sub.Tensor %5845, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5853 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5854 = torch.aten.pow.Tensor_Tensor %5853, %5852 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5855 = torch.aten.neg %5854 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5856 = torch.aten.neg %5855 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5857 = torch.aten.div.Tensor %5851, %5856 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5858 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5859 = torch.aten.detach %5858 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5860 = torch.aten.div.Tensor %5843, %5857 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5861 = torch.aten.add.Tensor %5860, %5859, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5862 = torch.aten.sub.Tensor %5845, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5863 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5864 = torch.aten.pow.Tensor_Tensor %5863, %5862 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5865 = torch.aten.neg %5864 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5866 = torch.aten.sub.Tensor %5845, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5867 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5868 = torch.aten.pow.Tensor_Tensor %5867, %5866 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5869 = torch.aten.sub.Tensor %5868, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5870 = torch.aten.gt.Tensor %5861, %5869 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5871 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5872 = torch.aten.to.dtype %5871, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5873 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5874 = torch.aten.broadcast_to %5872, %5873 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5875 = torch.valsem.aten.copy %5874, %5869, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5876 = torch.aten.where.self %5870, %5875, %5861 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5877 = torch.aten.lt.Tensor %5876, %5865 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5878 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5879 = torch.aten.to.dtype %5878, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5880 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5881 = torch.aten.broadcast_to %5879, %5880 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5882 = torch.valsem.aten.copy %5881, %5865, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5883 = torch.aten.where.self %5877, %5882, %5876 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5884 = torch.aten.round %5883 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5885 = torch.aten.sub.Tensor %5884, %5859, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5886 = torch.aten.mul.Tensor %5885, %5857 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5887 = torch.aten.broadcast_to %272, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5888 = torch.aten.clone %5887, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5889 = torch.aten.view %5888, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5890 = torch.aten.mul.Tensor %273, %5889 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5891 = torch.aten.transpose.int %5890, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5892 = torch.aten.view %5886, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5893 = torch.aten.mm %5892, %5891 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5894 = torch.aten.mul.Scalar %274, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5895 = torch.aten.add.Tensor %5894, %5893, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5896 = torch.aten.view %5895, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5897 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5898 = torch.aten.detach %5897 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5899 = torch.aten.view %5843, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5900 = torch.aten.abs %5899 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_154, %indices_155 = torch.aten.max.dim %5900, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5901 = torch.aten.view %values_154, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5902 = torch.aten.broadcast_to %5901, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5903 = torch.aten.clone %5902, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5904 = torch.aten.view %5903, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5905 = torch.aten.sub.Tensor %5898, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5906 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5907 = torch.aten.pow.Tensor_Tensor %5906, %5905 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5908 = torch.aten.neg %5907 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5909 = torch.aten.neg %5908 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5910 = torch.aten.div.Tensor %5904, %5909 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5911 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5912 = torch.aten.detach %5911 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5913 = torch.aten.div.Tensor %5843, %5910 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5914 = torch.aten.add.Tensor %5913, %5912, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5915 = torch.aten.sub.Tensor %5898, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5916 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5917 = torch.aten.pow.Tensor_Tensor %5916, %5915 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5918 = torch.aten.neg %5917 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5919 = torch.aten.sub.Tensor %5898, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5920 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5921 = torch.aten.pow.Tensor_Tensor %5920, %5919 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5922 = torch.aten.sub.Tensor %5921, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5923 = torch.aten.gt.Tensor %5914, %5922 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5924 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5925 = torch.aten.to.dtype %5924, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5926 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5927 = torch.aten.broadcast_to %5925, %5926 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5928 = torch.valsem.aten.copy %5927, %5922, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5929 = torch.aten.where.self %5923, %5928, %5914 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5930 = torch.aten.lt.Tensor %5929, %5918 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5931 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5932 = torch.aten.to.dtype %5931, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5933 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5934 = torch.aten.broadcast_to %5932, %5933 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5935 = torch.valsem.aten.copy %5934, %5918, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5936 = torch.aten.where.self %5930, %5935, %5929 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5937 = torch.aten.round %5936 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5938 = torch.aten.sub.Tensor %5937, %5912, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5939 = torch.aten.mul.Tensor %5938, %5910 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5940 = torch.aten.broadcast_to %275, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5941 = torch.aten.clone %5940, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5942 = torch.aten.view %5941, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5943 = torch.aten.mul.Tensor %276, %5942 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5944 = torch.aten.transpose.int %5943, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5945 = torch.aten.view %5939, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5946 = torch.aten.mm %5945, %5944 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5947 = torch.aten.mul.Scalar %277, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %5948 = torch.aten.add.Tensor %5947, %5946, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5949 = torch.aten.view %5948, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5950 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5951 = torch.aten.detach %5950 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5952 = torch.aten.view %5843, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5953 = torch.aten.abs %5952 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_156, %indices_157 = torch.aten.max.dim %5953, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %5954 = torch.aten.view %values_156, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %5955 = torch.aten.broadcast_to %5954, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5956 = torch.aten.clone %5955, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %5957 = torch.aten.view %5956, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5958 = torch.aten.sub.Tensor %5951, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5959 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5960 = torch.aten.pow.Tensor_Tensor %5959, %5958 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5961 = torch.aten.neg %5960 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5962 = torch.aten.neg %5961 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5963 = torch.aten.div.Tensor %5957, %5962 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5964 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5965 = torch.aten.detach %5964 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5966 = torch.aten.div.Tensor %5843, %5963 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5967 = torch.aten.add.Tensor %5966, %5965, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5968 = torch.aten.sub.Tensor %5951, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5969 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5970 = torch.aten.pow.Tensor_Tensor %5969, %5968 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5971 = torch.aten.neg %5970 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5972 = torch.aten.sub.Tensor %5951, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5973 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %5974 = torch.aten.pow.Tensor_Tensor %5973, %5972 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %5975 = torch.aten.sub.Tensor %5974, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %5976 = torch.aten.gt.Tensor %5967, %5975 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5977 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5978 = torch.aten.to.dtype %5977, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5979 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5980 = torch.aten.broadcast_to %5978, %5979 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5981 = torch.valsem.aten.copy %5980, %5975, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5982 = torch.aten.where.self %5976, %5981, %5967 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5983 = torch.aten.lt.Tensor %5982, %5971 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %5984 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %5985 = torch.aten.to.dtype %5984, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %5986 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %5987 = torch.aten.broadcast_to %5985, %5986 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %5988 = torch.valsem.aten.copy %5987, %5971, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %5989 = torch.aten.where.self %5983, %5988, %5982 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5990 = torch.aten.round %5989 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5991 = torch.aten.sub.Tensor %5990, %5965, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5992 = torch.aten.mul.Tensor %5991, %5963 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %5993 = torch.aten.broadcast_to %278, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5994 = torch.aten.clone %5993, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %5995 = torch.aten.view %5994, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5996 = torch.aten.mul.Tensor %279, %5995 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5997 = torch.aten.transpose.int %5996, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %5998 = torch.aten.view %5992, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %5999 = torch.aten.mm %5998, %5997 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6000 = torch.aten.mul.Scalar %280, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %6001 = torch.aten.add.Tensor %6000, %5999, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6002 = torch.aten.view %6001, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6003 = torch.aten.view %5896, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6004 = torch.aten.permute %6003, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6005 = torch.aten.clone %6004, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6006 = torch.aten.view %6005, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6007 = torch.aten.view %5949, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6008 = torch.aten.permute %6007, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6009 = torch.aten.clone %6008, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6010 = torch.aten.view %6009, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6011 = torch.aten.view %6002, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6012 = torch.aten.permute %6011, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6013 = torch.aten.clone %6012, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6014 = torch.aten.view %6013, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6015 = torch.aten.transpose.int %6010, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
    %6016 = torch.aten.broadcast_to %6006, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6017 = torch.aten.view %6016, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6018 = torch.aten.broadcast_to %6015, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
    %6019 = torch.aten.view %6018, %5136 : !torch.vtensor<[16,160,256],f16>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16> loc(#loc1)
    %6020 = torch.aten.bmm %6017, %6019 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6021 = torch.aten.view %6020, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6022 = torch.aten.mul.Tensor %6021, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %values_158, %indices_159 = torch.aten.max.dim %6022, %int-1, %true : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
    %6023 = torch.aten.sub.Tensor %6022, %values_158, %float1.000000e00 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6024 = torch.aten.exp %6023 : !torch.vtensor<[16,256,256],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6025 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %6026 = torch.aten.sum.dim_IntList %6024, %6025, %true, %none : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
    %6027 = torch.aten.div.Tensor %6024, %6026 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6028 = torch.aten.broadcast_to %6027, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6029 = torch.aten.view %6028, %5140 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16> loc(#loc1)
    %6030 = torch.aten.broadcast_to %6014, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6031 = torch.aten.view %6030, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6032 = torch.aten.bmm %6029, %6031 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6033 = torch.aten.view %6032, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6034 = torch.aten.view %6033, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6035 = torch.aten.permute %6034, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6036 = torch.aten.clone %6035, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6037 = torch.aten.view %6036, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6038 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6039 = torch.aten.detach %6038 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6040 = torch.aten.view %6037, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6041 = torch.aten.abs %6040 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_160, %indices_161 = torch.aten.max.dim %6041, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %6042 = torch.aten.view %values_160, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %6043 = torch.aten.broadcast_to %6042, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6044 = torch.aten.clone %6043, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6045 = torch.aten.view %6044, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6046 = torch.aten.sub.Tensor %6039, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6047 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6048 = torch.aten.pow.Tensor_Tensor %6047, %6046 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6049 = torch.aten.neg %6048 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6050 = torch.aten.neg %6049 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6051 = torch.aten.div.Tensor %6045, %6050 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6052 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6053 = torch.aten.detach %6052 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6054 = torch.aten.div.Tensor %6037, %6051 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6055 = torch.aten.add.Tensor %6054, %6053, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6056 = torch.aten.sub.Tensor %6039, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6057 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6058 = torch.aten.pow.Tensor_Tensor %6057, %6056 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6059 = torch.aten.neg %6058 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6060 = torch.aten.sub.Tensor %6039, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6061 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6062 = torch.aten.pow.Tensor_Tensor %6061, %6060 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6063 = torch.aten.sub.Tensor %6062, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6064 = torch.aten.gt.Tensor %6055, %6063 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %6065 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6066 = torch.aten.to.dtype %6065, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6067 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6068 = torch.aten.broadcast_to %6066, %6067 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6069 = torch.valsem.aten.copy %6068, %6063, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6070 = torch.aten.where.self %6064, %6069, %6055 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6071 = torch.aten.lt.Tensor %6070, %6059 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %6072 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6073 = torch.aten.to.dtype %6072, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6074 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6075 = torch.aten.broadcast_to %6073, %6074 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6076 = torch.valsem.aten.copy %6075, %6059, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6077 = torch.aten.where.self %6071, %6076, %6070 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6078 = torch.aten.round %6077 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6079 = torch.aten.sub.Tensor %6078, %6053, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6080 = torch.aten.mul.Tensor %6079, %6051 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6081 = torch.aten.broadcast_to %281, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %6082 = torch.aten.clone %6081, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %6083 = torch.aten.view %6082, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6084 = torch.aten.mul.Tensor %282, %6083 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6085 = torch.aten.transpose.int %6084, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6086 = torch.aten.view %6080, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6087 = torch.aten.mm %6086, %6085 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6088 = torch.aten.mul.Scalar %283, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %6089 = torch.aten.add.Tensor %6088, %6087, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6090 = torch.aten.view %6089, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6091 = torch.aten.add.Tensor %6090, %5829, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6092 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %6093 = torch.aten.sum.dim_IntList %6091, %6092, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6094 = torch.aten.div.Scalar %6093, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6095 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %6096 = torch.aten.broadcast_to %6094, %6095 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6097 = torch.aten.sub.Tensor %6091, %6096, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6098 = torch.aten.mul.Tensor %6097, %6097 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6099 = torch.aten.sum.dim_IntList %6098, %6092, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6100 = torch.aten.div.Scalar %6099, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6101 = torch.aten.add.Scalar %6100, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6102 = torch.aten.rsqrt %6101 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6103 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %6104 = torch.aten.broadcast_to %6102, %6103 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6105 = torch.aten.mul.Tensor %6097, %6104 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6106 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6107 = torch.aten.detach %6106 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6108 = torch.aten.view %6105, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6109 = torch.aten.abs %6108 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_162, %indices_163 = torch.aten.max.dim %6109, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %6110 = torch.aten.view %values_162, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %6111 = torch.aten.broadcast_to %6110, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6112 = torch.aten.clone %6111, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6113 = torch.aten.view %6112, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6114 = torch.aten.sub.Tensor %6107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6115 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6116 = torch.aten.pow.Tensor_Tensor %6115, %6114 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6117 = torch.aten.neg %6116 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6118 = torch.aten.neg %6117 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6119 = torch.aten.div.Tensor %6113, %6118 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6120 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6121 = torch.aten.detach %6120 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6122 = torch.aten.div.Tensor %6105, %6119 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6123 = torch.aten.add.Tensor %6122, %6121, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6124 = torch.aten.sub.Tensor %6107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6125 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6126 = torch.aten.pow.Tensor_Tensor %6125, %6124 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6127 = torch.aten.neg %6126 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6128 = torch.aten.sub.Tensor %6107, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6129 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6130 = torch.aten.pow.Tensor_Tensor %6129, %6128 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6131 = torch.aten.sub.Tensor %6130, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6132 = torch.aten.gt.Tensor %6123, %6131 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %6133 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6134 = torch.aten.to.dtype %6133, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6135 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6136 = torch.aten.broadcast_to %6134, %6135 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6137 = torch.valsem.aten.copy %6136, %6131, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6138 = torch.aten.where.self %6132, %6137, %6123 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6139 = torch.aten.lt.Tensor %6138, %6127 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %6140 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6141 = torch.aten.to.dtype %6140, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6142 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6143 = torch.aten.broadcast_to %6141, %6142 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6144 = torch.valsem.aten.copy %6143, %6127, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6145 = torch.aten.where.self %6139, %6144, %6138 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6146 = torch.aten.round %6145 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6147 = torch.aten.sub.Tensor %6146, %6121, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6148 = torch.aten.mul.Tensor %6147, %6119 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6149 = torch.aten.broadcast_to %284, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %6150 = torch.aten.clone %6149, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %6151 = torch.aten.view %6150, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6152 = torch.aten.mul.Tensor %285, %6151 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6153 = torch.aten.transpose.int %6152, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6154 = torch.aten.view %6148, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6155 = torch.aten.mm %6154, %6153 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6156 = torch.aten.mul.Scalar %286, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %6157 = torch.aten.add.Tensor %6156, %6155, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6158 = torch.aten.view %6157, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6159 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6160 = torch.aten.detach %6159 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6161 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %6162 = torch.aten.abs %6161 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_164, %indices_165 = torch.aten.max.dim %6162, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %6163 = torch.aten.view %values_164, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %6164 = torch.aten.broadcast_to %6163, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %6165 = torch.aten.clone %6164, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %6166 = torch.aten.view %6165, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6167 = torch.aten.sub.Tensor %6160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6168 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6169 = torch.aten.pow.Tensor_Tensor %6168, %6167 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6170 = torch.aten.neg %6169 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6171 = torch.aten.neg %6170 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6172 = torch.aten.div.Tensor %6166, %6171 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6173 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6174 = torch.aten.detach %6173 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6175 = torch.aten.div.Tensor %arg2, %6172 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6176 = torch.aten.add.Tensor %6175, %6174, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6177 = torch.aten.sub.Tensor %6160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6178 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6179 = torch.aten.pow.Tensor_Tensor %6178, %6177 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6180 = torch.aten.neg %6179 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6181 = torch.aten.sub.Tensor %6160, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6182 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6183 = torch.aten.pow.Tensor_Tensor %6182, %6181 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6184 = torch.aten.sub.Tensor %6183, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6185 = torch.aten.gt.Tensor %6176, %6184 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %6186 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6187 = torch.aten.to.dtype %6186, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6188 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6189 = torch.aten.broadcast_to %6187, %6188 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6190 = torch.valsem.aten.copy %6189, %6184, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6191 = torch.aten.where.self %6185, %6190, %6176 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6192 = torch.aten.lt.Tensor %6191, %6180 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %6193 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6194 = torch.aten.to.dtype %6193, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6195 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6196 = torch.aten.broadcast_to %6194, %6195 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6197 = torch.valsem.aten.copy %6196, %6180, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6198 = torch.aten.where.self %6192, %6197, %6191 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6199 = torch.aten.round %6198 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6200 = torch.aten.sub.Tensor %6199, %6174, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6201 = torch.aten.mul.Tensor %6200, %6172 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6202 = torch.aten.broadcast_to %287, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %6203 = torch.aten.clone %6202, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %6204 = torch.aten.view %6203, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %6205 = torch.aten.mul.Tensor %288, %6204 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %6206 = torch.aten.transpose.int %6205, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
    %6207 = torch.aten.view %6201, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %6208 = torch.aten.mm %6207, %6206 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
    %6209 = torch.aten.view %6208, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
    %6210 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6211 = torch.aten.detach %6210 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6212 = torch.aten.view %arg2, %1542 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %6213 = torch.aten.abs %6212 : !torch.vtensor<[2,77,48,16],f16> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %values_166, %indices_167 = torch.aten.max.dim %6213, %int3, %true : !torch.vtensor<[2,77,48,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,77,48,1],f16>, !torch.vtensor<[2,77,48,1],si64> loc(#loc1)
    %6214 = torch.aten.view %values_166, %1545 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,1],f16> loc(#loc1)
    %6215 = torch.aten.broadcast_to %6214, %1542 : !torch.vtensor<[2,77,48,1],f16>, !torch.list<int> -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %6216 = torch.aten.clone %6215, %int0 : !torch.vtensor<[2,77,48,16],f16>, !torch.int -> !torch.vtensor<[2,77,48,16],f16> loc(#loc1)
    %6217 = torch.aten.view %6216, %1549 : !torch.vtensor<[2,77,48,16],f16>, !torch.list<int> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6218 = torch.aten.sub.Tensor %6211, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6219 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6220 = torch.aten.pow.Tensor_Tensor %6219, %6218 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6221 = torch.aten.neg %6220 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6222 = torch.aten.neg %6221 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6223 = torch.aten.div.Tensor %6217, %6222 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6224 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6225 = torch.aten.detach %6224 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6226 = torch.aten.div.Tensor %arg2, %6223 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6227 = torch.aten.add.Tensor %6226, %6225, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6228 = torch.aten.sub.Tensor %6211, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6229 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6230 = torch.aten.pow.Tensor_Tensor %6229, %6228 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6231 = torch.aten.neg %6230 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6232 = torch.aten.sub.Tensor %6211, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6233 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6234 = torch.aten.pow.Tensor_Tensor %6233, %6232 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6235 = torch.aten.sub.Tensor %6234, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6236 = torch.aten.gt.Tensor %6227, %6235 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %6237 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6238 = torch.aten.to.dtype %6237, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6239 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6240 = torch.aten.broadcast_to %6238, %6239 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6241 = torch.valsem.aten.copy %6240, %6235, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6242 = torch.aten.where.self %6236, %6241, %6227 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6243 = torch.aten.lt.Tensor %6242, %6231 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,77,768],i1> loc(#loc1)
    %6244 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6245 = torch.aten.to.dtype %6244, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6246 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6247 = torch.aten.broadcast_to %6245, %6246 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6248 = torch.valsem.aten.copy %6247, %6231, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6249 = torch.aten.where.self %6243, %6248, %6242 : !torch.vtensor<[2,77,768],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6250 = torch.aten.round %6249 : !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6251 = torch.aten.sub.Tensor %6250, %6225, %int1 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6252 = torch.aten.mul.Tensor %6251, %6223 : !torch.vtensor<[2,77,768],f16>, !torch.vtensor<[2,77,768],f16> -> !torch.vtensor<[2,77,768],f16> loc(#loc1)
    %6253 = torch.aten.broadcast_to %289, %5323 : !torch.vtensor<[1280,48,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %6254 = torch.aten.clone %6253, %int0 : !torch.vtensor<[1280,48,16],f16>, !torch.int -> !torch.vtensor<[1280,48,16],f16> loc(#loc1)
    %6255 = torch.aten.view %6254, %5326 : !torch.vtensor<[1280,48,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %6256 = torch.aten.mul.Tensor %290, %6255 : !torch.vtensor<[1280,768],si8>, !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[1280,768],f16> loc(#loc1)
    %6257 = torch.aten.transpose.int %6256, %int0, %int1 : !torch.vtensor<[1280,768],f16>, !torch.int, !torch.int -> !torch.vtensor<[768,1280],f16> loc(#loc1)
    %6258 = torch.aten.view %6252, %1593 : !torch.vtensor<[2,77,768],f16>, !torch.list<int> -> !torch.vtensor<[154,768],f16> loc(#loc1)
    %6259 = torch.aten.mm %6258, %6257 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16> loc(#loc1)
    %6260 = torch.aten.view %6259, %5332 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16> loc(#loc1)
    %6261 = torch.aten.view %6158, %5119 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6262 = torch.aten.permute %6261, %1380 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6263 = torch.aten.clone %6262, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6264 = torch.aten.view %6263, %5123 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6265 = torch.aten.view %6209, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
    %6266 = torch.aten.permute %6265, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %6267 = torch.aten.clone %6266, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %6268 = torch.aten.view %6267, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %6269 = torch.aten.view %6260, %5389 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16> loc(#loc1)
    %6270 = torch.aten.permute %6269, %1380 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %6271 = torch.aten.clone %6270, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16> loc(#loc1)
    %6272 = torch.aten.view %6271, %5393 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %6273 = torch.aten.transpose.int %6268, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
    %6274 = torch.aten.broadcast_to %6264, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6275 = torch.aten.view %6274, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6276 = torch.aten.broadcast_to %6273, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
    %6277 = torch.aten.view %6276, %5402 : !torch.vtensor<[16,160,77],f16>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16> loc(#loc1)
    %6278 = torch.aten.bmm %6275, %6277 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6279 = torch.aten.view %6278, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6280 = torch.aten.mul.Tensor %6279, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %values_168, %indices_169 = torch.aten.max.dim %6280, %int-1, %true : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,1],f16>, !torch.vtensor<[16,256,1],si64> loc(#loc1)
    %6281 = torch.aten.sub.Tensor %6280, %values_168, %float1.000000e00 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16>, !torch.float -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6282 = torch.aten.exp %6281 : !torch.vtensor<[16,256,77],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6283 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %6284 = torch.aten.sum.dim_IntList %6282, %6283, %true, %none : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[16,256,1],f16> loc(#loc1)
    %6285 = torch.aten.div.Tensor %6282, %6284 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,256,1],f16> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6286 = torch.aten.broadcast_to %6285, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6287 = torch.aten.view %6286, %5406 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16> loc(#loc1)
    %6288 = torch.aten.broadcast_to %6272, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %6289 = torch.aten.view %6288, %5393 : !torch.vtensor<[16,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16> loc(#loc1)
    %6290 = torch.aten.bmm %6287, %6289 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6291 = torch.aten.view %6290, %5123 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16> loc(#loc1)
    %6292 = torch.aten.view %6291, %5154 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16> loc(#loc1)
    %6293 = torch.aten.permute %6292, %1380 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6294 = torch.aten.clone %6293, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16> loc(#loc1)
    %6295 = torch.aten.view %6294, %4939 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6296 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6297 = torch.aten.detach %6296 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6298 = torch.aten.view %6295, %4957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6299 = torch.aten.abs %6298 : !torch.vtensor<[2,256,80,16],f16> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %values_170, %indices_171 = torch.aten.max.dim %6299, %int3, %true : !torch.vtensor<[2,256,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,256,80,1],f16>, !torch.vtensor<[2,256,80,1],si64> loc(#loc1)
    %6300 = torch.aten.view %values_170, %4960 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,1],f16> loc(#loc1)
    %6301 = torch.aten.broadcast_to %6300, %4957 : !torch.vtensor<[2,256,80,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6302 = torch.aten.clone %6301, %int0 : !torch.vtensor<[2,256,80,16],f16>, !torch.int -> !torch.vtensor<[2,256,80,16],f16> loc(#loc1)
    %6303 = torch.aten.view %6302, %4939 : !torch.vtensor<[2,256,80,16],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6304 = torch.aten.sub.Tensor %6297, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6305 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6306 = torch.aten.pow.Tensor_Tensor %6305, %6304 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6307 = torch.aten.neg %6306 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6308 = torch.aten.neg %6307 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6309 = torch.aten.div.Tensor %6303, %6308 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6310 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6311 = torch.aten.detach %6310 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6312 = torch.aten.div.Tensor %6295, %6309 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6313 = torch.aten.add.Tensor %6312, %6311, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6314 = torch.aten.sub.Tensor %6297, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6315 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6316 = torch.aten.pow.Tensor_Tensor %6315, %6314 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6317 = torch.aten.neg %6316 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6318 = torch.aten.sub.Tensor %6297, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6319 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6320 = torch.aten.pow.Tensor_Tensor %6319, %6318 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6321 = torch.aten.sub.Tensor %6320, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6322 = torch.aten.gt.Tensor %6313, %6321 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %6323 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6324 = torch.aten.to.dtype %6323, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6325 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6326 = torch.aten.broadcast_to %6324, %6325 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6327 = torch.valsem.aten.copy %6326, %6321, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6328 = torch.aten.where.self %6322, %6327, %6313 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6329 = torch.aten.lt.Tensor %6328, %6317 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,256,1280],i1> loc(#loc1)
    %6330 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64> loc(#loc1)
    %6331 = torch.aten.to.dtype %6330, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16> loc(#loc1)
    %6332 = torch.prim.ListConstruct  : () -> !torch.list<int> loc(#loc1)
    %6333 = torch.aten.broadcast_to %6331, %6332 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[],f16> loc(#loc1)
    %6334 = torch.valsem.aten.copy %6333, %6317, %false : !torch.vtensor<[],f16>, !torch.vtensor<[],f32>, !torch.bool -> !torch.vtensor<[],f16> loc(#loc1)
    %6335 = torch.aten.where.self %6329, %6334, %6328 : !torch.vtensor<[2,256,1280],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6336 = torch.aten.round %6335 : !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6337 = torch.aten.sub.Tensor %6336, %6311, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6338 = torch.aten.mul.Tensor %6337, %6309 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6339 = torch.aten.broadcast_to %291, %5000 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %6340 = torch.aten.clone %6339, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16> loc(#loc1)
    %6341 = torch.aten.view %6340, %5003 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6342 = torch.aten.mul.Tensor %292, %6341 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6343 = torch.aten.transpose.int %6342, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> loc(#loc1)
    %6344 = torch.aten.view %6338, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6345 = torch.aten.mm %6344, %6343 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6346 = torch.aten.mul.Scalar %293, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %6347 = torch.aten.add.Tensor %6346, %6345, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6348 = torch.aten.view %6347, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6349 = torch.aten.add.Tensor %6348, %6091, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6350 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %6351 = torch.aten.sum.dim_IntList %6349, %6350, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6352 = torch.aten.div.Scalar %6351, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6353 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %6354 = torch.aten.broadcast_to %6352, %6353 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6355 = torch.aten.sub.Tensor %6349, %6354, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6356 = torch.aten.mul.Tensor %6355, %6355 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6357 = torch.aten.sum.dim_IntList %6356, %6350, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6358 = torch.aten.div.Scalar %6357, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6359 = torch.aten.add.Scalar %6358, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6360 = torch.aten.rsqrt %6359 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> loc(#loc1)
    %6361 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %6362 = torch.aten.broadcast_to %6360, %6361 : !torch.vtensor<[2,256,1],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6363 = torch.aten.mul.Tensor %6355, %6362 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6364 = torch.aten.broadcast_to %294, %5492 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
    %6365 = torch.aten.clone %6364, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16> loc(#loc1)
    %6366 = torch.aten.view %6365, %5495 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
    %6367 = torch.aten.mul.Tensor %295, %6366 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16> loc(#loc1)
    %6368 = torch.aten.transpose.int %6367, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> loc(#loc1)
    %6369 = torch.aten.view %6363, %5007 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6370 = torch.aten.mm %6369, %6368 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16> loc(#loc1)
    %6371 = torch.aten.mul.Scalar %296, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> loc(#loc1)
    %6372 = torch.aten.add.Tensor %6371, %6370, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16> loc(#loc1)
    %6373 = torch.aten.view %6372, %5503 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16> loc(#loc1)
    %6374 = torch.aten.slice.Tensor %6373, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %6375 = torch.aten.slice.Tensor %6373, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %6376 = torch.aten.gelu %6375, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %6377 = torch.aten.mul.Tensor %6374, %6376 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16> loc(#loc1)
    %6378 = torch.aten.broadcast_to %297, %5509 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
    %6379 = torch.aten.clone %6378, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16> loc(#loc1)
    %6380 = torch.aten.view %6379, %5512 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
    %6381 = torch.aten.mul.Tensor %298, %6380 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16> loc(#loc1)
    %6382 = torch.aten.transpose.int %6381, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> loc(#loc1)
    %6383 = torch.aten.view %6377, %5516 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16> loc(#loc1)
    %6384 = torch.aten.mm %6383, %6382 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6385 = torch.aten.mul.Scalar %299, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> loc(#loc1)
    %6386 = torch.aten.add.Tensor %6385, %6384, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> loc(#loc1)
    %6387 = torch.aten.view %6386, %4939 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6388 = torch.aten.add.Tensor %6387, %6349, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> loc(#loc1)
    %6389 = torch.aten.view %6388, %5523 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> loc(#loc1)
    %6390 = torch.aten.permute %6389, %1789 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %6391 = torch.aten.detach %880 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6392 = torch.aten.detach %6391 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6393 = torch.aten.view %6390, %4759 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %6394 = torch.aten.abs %6393 : !torch.vtensor<[2,80,16,16,16],f16> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %values_172, %indices_173 = torch.aten.max.dim %6394, %int2, %true : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[2,80,1,16,16],f16>, !torch.vtensor<[2,80,1,16,16],si64> loc(#loc1)
    %6395 = torch.aten.view %values_172, %4762 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,1,16,16],f16> loc(#loc1)
    %6396 = torch.aten.broadcast_to %6395, %4759 : !torch.vtensor<[2,80,1,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %6397 = torch.aten.clone %6396, %int0 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.int -> !torch.vtensor<[2,80,16,16,16],f16> loc(#loc1)
    %6398 = torch.aten.view %6397, %4742 : !torch.vtensor<[2,80,16,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %6399 = torch.aten.sub.Tensor %6392, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6400 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6401 = torch.aten.pow.Tensor_Tensor %6400, %6399 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6402 = torch.aten.neg %6401 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6403 = torch.aten.neg %6402 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6404 = torch.aten.div.Tensor %6398, %6403 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %6405 = torch.aten.detach %881 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6406 = torch.aten.detach %6405 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6407 = torch.aten.div.Tensor %6390, %6404 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %6408 = torch.aten.add.Tensor %6407, %6406, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> loc(#loc1)
    %6409 = torch.aten.sub.Tensor %6392, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6410 = torch.aten.clone %882, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32> loc(#loc1)
    %6411 = torch.aten.pow.Tensor_Tensor %6410, %6409 : !torch.vtensor<[],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6412 = torch.aten.neg %6411 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32> loc(#loc1)
    %6413 = torch.aten.sub.Tensor %6392, %6, %int1 : !torch.vtensor<[],f32>, !torch.vtensor<[],si64>, !torch.int -> !torch.vtensor<[],f32> loc(#loc1)
    %6414 = torch.aten.clone %882, %none : !torch.vtensor<