pashu123 · February 14, 2023 17:19
diff --git a/torch_el.mlir b/torch_el.mlir
 module attributes {torch.debug_module_name = "_lambda"} {
  func.func @forward(%arg0: !torch.vtensor<[2,4,96,96],f16>, %arg1: !torch.vtensor<[1],f32>, %arg2: !torch.vtensor<[4,77,1024],f16>, %arg3: !torch.vtensor<[],f32>) -> !torch.vtensor<[2,96,96],f16> {
    %int1 = torch.constant.int 1
    %int0 = torch.constant.int 0
    %false = torch.constant.bool false
    %int4 = torch.constant.int 4
    %float0.000000e00 = torch.constant.float 0.000000e+00
    %int5 = torch.constant.int 5
    %none = torch.constant.none
    %int320 = torch.constant.int 320
    %int96 = torch.constant.int 96
    %int7 = torch.constant.int 7
    %true = torch.constant.bool true
    %int92160 = torch.constant.int 92160
    %int10 = torch.constant.int 10
    %int9216 = torch.constant.int 9216
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %int6 = torch.constant.int 6
    %int32 = torch.constant.int 32
    %int2 = torch.constant.int 2
    %int20 = torch.constant.int 20
    %int77 = torch.constant.int 77
    %int64 = torch.constant.int 64
    %int640 = torch.constant.int 640
    %int184320 = torch.constant.int 184320
    %int960 = torch.constant.int 960
    %int276480 = torch.constant.int 276480
    %int30 = torch.constant.int 30
    %int2304 = torch.constant.int 2304
    %int40 = torch.constant.int 40
    %int48 = torch.constant.int 48
    %int46080 = torch.constant.int 46080
    %int69120 = torch.constant.int 69120
    %int1280 = torch.constant.int 1280
    %int1920 = torch.constant.int 1920
    %int138240 = torch.constant.int 138240
    %int60 = torch.constant.int 60
    %int576 = torch.constant.int 576
    %int80 = torch.constant.int 80
    %int24 = torch.constant.int 24
    %int23040 = torch.constant.int 23040
    %int34560 = torch.constant.int 34560
    %int2560 = torch.constant.int 2560
    %int12 = torch.constant.int 12
    %int5760 = torch.constant.int 5760
    %int144 = torch.constant.int 144
    %int11520 = torch.constant.int 11520
    %int160 = torch.constant.int 160
    %0 = torch.vtensor.literal(dense<[-3.863530e-02, 1.899720e-02, -1.201630e-02, -4.989620e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16>
    %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16>
    %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %4 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8>
    %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16>
    %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8>
    %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16>
    %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xsi8>) : !torch.vtensor<[320,640,1,1],si8>
    %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x1x1xf16>) : !torch.vtensor<[320,40,1,1,1],f16>
    %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xsi8>) : !torch.vtensor<[320,640,3,3],si8>
    %54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x3x3xf16>) : !torch.vtensor<[320,40,1,3,3],f16>
    %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8>
    %62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16>
    %63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8>
    %65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16>
    %66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xsi8>) : !torch.vtensor<[320,640,1,1],si8>
    %97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x1x1xf16>) : !torch.vtensor<[320,40,1,1,1],f16>
    %98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xsi8>) : !torch.vtensor<[320,640,3,3],si8>
    %107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x40x1x3x3xf16>) : !torch.vtensor<[320,40,1,3,3],f16>
    %108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8>
    %115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16>
    %116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8>
    %118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16>
    %119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xsi8>) : !torch.vtensor<[320,960,1,1],si8>
    %150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x60x1x1x1xf16>) : !torch.vtensor<[320,60,1,1,1],f16>
    %151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xsi8>) : !torch.vtensor<[320,960,3,3],si8>
    %160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x60x1x3x3xf16>) : !torch.vtensor<[320,60,1,3,3],f16>
    %161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8>
    %171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16>
    %172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8>
    %174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16>
    %175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xsi8>) : !torch.vtensor<[640,960,1,1],si8>
    %206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x60x1x1x1xf16>) : !torch.vtensor<[640,60,1,1,1],f16>
    %207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xsi8>) : !torch.vtensor<[640,960,3,3],si8>
    %216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x60x1x3x3xf16>) : !torch.vtensor<[640,60,1,3,3],f16>
    %217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
    %219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8>
    %224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16>
    %225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8>
    %227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16>
    %228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xsi8>) : !torch.vtensor<[640,1280,1,1],si8>
    %259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x80x1x1x1xf16>) : !torch.vtensor<[640,80,1,1,1],f16>
    %260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xsi8>) : !torch.vtensor<[640,1280,3,3],si8>
    %269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x80x1x3x3xf16>) : !torch.vtensor<[640,80,1,3,3],f16>
    %270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8>
    %277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16>
    %278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8>
    %280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16>
    %281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xsi8>) : !torch.vtensor<[640,1920,1,1],si8>
    %312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x120x1x1x1xf16>) : !torch.vtensor<[640,120,1,1,1],f16>
    %313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xsi8>) : !torch.vtensor<[640,1920,3,3],si8>
    %322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x120x1x3x3xf16>) : !torch.vtensor<[640,120,1,3,3],f16>
    %323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8>
    %333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16>
    %334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8>
    %336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16>
    %337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xsi8>) : !torch.vtensor<[1280,1920,1,1],si8>
    %368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x120x1x1x1xf16>) : !torch.vtensor<[1280,120,1,1,1],f16>
    %369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xsi8>) : !torch.vtensor<[1280,1920,3,3],si8>
    %378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x120x1x3x3xf16>) : !torch.vtensor<[1280,120,1,3,3],f16>
    %379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
    %381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8>
    %386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16>
    %387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8>
    %389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16>
    %390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8>
    %421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16>
    %422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8>
    %431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16>
    %432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8>
    %439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16>
    %440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8>
    %442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16>
    %443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8>
    %474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16>
    %475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8>
    %484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16>
    %485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8>
    %492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16>
    %493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8>
    %502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16>
    %503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8>
    %507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16>
    %508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8>
    %517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16>
    %518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xsi8>) : !torch.vtensor<[1280,2560,1,1],si8>
    %522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x1x1xf16>) : !torch.vtensor<[1280,160,1,1,1],f16>
    %523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xsi8>) : !torch.vtensor<[1280,2560,3,3],si8>
    %532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x160x1x3x3xf16>) : !torch.vtensor<[1280,160,1,3,3],f16>
    %533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8>
    %552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16>
    %553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8>
    %555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16>
    %556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %581 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %582 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %583 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %585 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %586 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %587 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %588 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %589 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %590 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %591 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %592 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %593 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %594 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %595 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %596 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %597 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %598 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %599 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %600 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %601 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %602 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %605 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %606 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %607 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %608 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %609 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %610 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %611 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %612 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %613 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %614 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %615 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %616 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %617 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %618 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %619 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %620 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %621 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %622 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %623 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %624 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %625 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %626 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %627 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %628 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8>
    %629 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16>
    %630 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %631 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8>
    %632 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16>
    %633 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %634 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %635 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %636 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %637 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %638 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %639 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %640 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %641 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %642 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %643 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %644 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %645 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %649 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %650 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %651 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %652 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %653 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %654 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %655 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %656 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %657 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %658 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %659 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %660 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %661 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %662 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %663 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %664 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %665 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %666 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %667 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %668 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %669 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %670 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %671 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %672 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %673 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %674 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %675 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %676 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %677 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %678 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xsi8>) : !torch.vtensor<[1280,5120],si8>
    %679 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320x1xf16>) : !torch.vtensor<[1280,320,1],f16>
    %680 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
    %681 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xsi8>) : !torch.vtensor<[10240,1280],si8>
    %682 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x80x1xf16>) : !torch.vtensor<[10240,80,1],f16>
    %683 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %684 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %685 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %686 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %687 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %688 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %689 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %690 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xsi8>) : !torch.vtensor<[1280,1024],si8>
    %691 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x64x1xf16>) : !torch.vtensor<[1280,64,1],f16>
    %692 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %693 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %694 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %695 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %696 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %697 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %698 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %699 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %700 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %701 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %702 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %703 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %704 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %705 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %706 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %707 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %708 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xsi8>) : !torch.vtensor<[1280,1280],si8>
    %709 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1xf16>) : !torch.vtensor<[1280,80,1],f16>
    %710 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %711 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %712 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %713 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xsi8>) : !torch.vtensor<[1280,640,1,1],si8>
    %714 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x40x1x1x1xf16>) : !torch.vtensor<[1280,40,1,1,1],f16>
    %715 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %716 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xsi8>) : !torch.vtensor<[1280,1280,3,3],si8>
    %717 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x80x1x3x3xf16>) : !torch.vtensor<[1280,80,1,3,3],f16>
    %718 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %719 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %720 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %721 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %722 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %723 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xsi8>) : !torch.vtensor<[1280,640,3,3],si8>
    %724 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x40x1x3x3xf16>) : !torch.vtensor<[1280,40,1,3,3],f16>
    %725 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %726 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %727 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %728 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %729 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %730 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %731 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %732 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %733 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %734 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8>
    %735 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16>
    %736 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %737 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8>
    %738 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16>
    %739 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %740 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %741 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %742 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %743 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %744 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %745 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %746 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %747 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %748 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %749 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %750 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %751 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %752 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %753 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %754 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %755 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %756 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %757 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %758 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %759 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %760 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %761 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %762 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %763 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %764 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %765 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %766 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %767 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %768 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %769 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %770 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %771 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %772 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %773 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %774 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %775 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %776 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %777 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %778 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %779 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %780 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %781 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %782 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %783 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %784 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xsi8>) : !torch.vtensor<[640,2560],si8>
    %785 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x160x1xf16>) : !torch.vtensor<[640,160,1],f16>
    %786 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
    %787 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xsi8>) : !torch.vtensor<[5120,640],si8>
    %788 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x40x1xf16>) : !torch.vtensor<[5120,40,1],f16>
    %789 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %790 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %791 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %792 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %793 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %794 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %795 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %796 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xsi8>) : !torch.vtensor<[640,1024],si8>
    %797 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x64x1xf16>) : !torch.vtensor<[640,64,1],f16>
    %798 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %799 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %800 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %801 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %802 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %803 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %804 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %805 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %806 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %807 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %808 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %809 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %810 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %811 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %812 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %813 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %814 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xsi8>) : !torch.vtensor<[640,640],si8>
    %815 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1xf16>) : !torch.vtensor<[640,40,1],f16>
    %816 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %817 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %818 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %819 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xsi8>) : !torch.vtensor<[640,320,1,1],si8>
    %820 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x20x1x1x1xf16>) : !torch.vtensor<[640,20,1,1,1],f16>
    %821 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %822 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xsi8>) : !torch.vtensor<[640,640,3,3],si8>
    %823 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x40x1x3x3xf16>) : !torch.vtensor<[640,40,1,3,3],f16>
    %824 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %825 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %826 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %827 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
    %828 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
    %829 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xsi8>) : !torch.vtensor<[640,320,3,3],si8>
    %830 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x20x1x3x3xf16>) : !torch.vtensor<[640,20,1,3,3],f16>
    %831 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %832 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %833 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %834 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %835 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %836 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %837 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %838 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %839 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %840 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8>
    %841 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16>
    %842 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %843 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8>
    %844 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16>
    %845 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %846 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %847 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %848 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %849 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %850 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %851 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %852 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %853 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %854 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %855 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %856 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %857 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %858 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %859 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %860 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %861 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %862 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %863 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %864 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %865 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %866 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %867 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %868 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %869 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %870 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %871 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %872 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %873 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %874 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %875 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %876 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %877 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %878 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %879 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %880 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %881 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %882 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %883 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %884 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %885 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %886 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %887 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %888 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %889 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %890 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xsi8>) : !torch.vtensor<[320,1280],si8>
    %891 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x80x1xf16>) : !torch.vtensor<[320,80,1],f16>
    %892 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
    %893 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xsi8>) : !torch.vtensor<[2560,320],si8>
    %894 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x20x1xf16>) : !torch.vtensor<[2560,20,1],f16>
    %895 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %896 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %897 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %898 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %899 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %900 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %901 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %902 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xsi8>) : !torch.vtensor<[320,1024],si8>
    %903 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x64x1xf16>) : !torch.vtensor<[320,64,1],f16>
    %904 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %905 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %906 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %907 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %908 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %909 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %910 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %911 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %912 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %913 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %914 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %915 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %916 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %917 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %918 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %919 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %920 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xsi8>) : !torch.vtensor<[320,320],si8>
    %921 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1xf16>) : !torch.vtensor<[320,20,1],f16>
    %922 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %923 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %924 = torch.vtensor.literal(dense<1.013280e-06> : tensor<f16>) : !torch.vtensor<[],f16>
    %925 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f16>) : !torch.vtensor<[],f16>
    %926 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %927 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %928 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %929 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %930 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %931 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %932 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
    %933 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %934 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xsi8>) : !torch.vtensor<[320,320,3,3],si8>
    %935 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x20x1x3x3xf16>) : !torch.vtensor<[320,20,1,3,3],f16>
    %936 = torch.vtensor.literal(dense<0.000000e+00> : tensor<f16>) : !torch.vtensor<[],f16>
    %937 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %938 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %939 = torch.vtensor.literal(dense<1.001360e-05> : tensor<f16>) : !torch.vtensor<[],f16>
    %940 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
    %941 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16>
    %942 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %943 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
    %944 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
    %945 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16>
    %946 = torch.vtensor.literal(dense<-9.210930e+00> : tensor<f16>) : !torch.vtensor<[],f16>
    %int-1 = torch.constant.int -1
    %int-128 = torch.constant.int -128
    %int-2 = torch.constant.int -2
    %str = torch.constant.str "none"
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %int3 = torch.constant.int 3
    %int16 = torch.constant.int 16
    %int128 = torch.constant.int 128
    %int127 = torch.constant.int 127
    %int36864 = torch.constant.int 36864
    %float1.000000e-05 = torch.constant.float 1.000000e-05
    %float1.250000e-01 = torch.constant.float 1.250000e-01
    %int1024 = torch.constant.int 1024
    %int308 = torch.constant.int 308
    %int5120 = torch.constant.int 5120
    %int10240 = torch.constant.int 10240
    %float2.000000e00 = torch.constant.float 2.000000e+00
    %int120 = torch.constant.int 120
    %cuda3A0 = torch.constant.device "cuda:0"
    %947 = torch.prim.ListConstruct %arg0, %arg0 : (!torch.vtensor<[2,4,96,96],f16>, !torch.vtensor<[2,4,96,96],f16>) -> !torch.list<vtensor>
    %948 = torch.aten.cat %947, %int0 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,4,96,96],f16>
    %949 = torch.prim.ListConstruct %int4 : (!torch.int) -> !torch.list<int>
    %950 = torch.aten.broadcast_to %arg1, %949 : !torch.vtensor<[1],f32>, !torch.list<int> -> !torch.vtensor<[4],f32>
    %951 = torch.aten.arange.start_step %int0, %int160, %int1, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
    %952 = torch.aten.mul.Tensor %951, %946 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f16> -> !torch.vtensor<[160],f32>
    %953 = torch.aten.div.Scalar %952, %int160 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[160],f32>
    %954 = torch.aten.exp %953 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
    %955 = torch.aten.unsqueeze %950, %int1 : !torch.vtensor<[4],f32>, !torch.int -> !torch.vtensor<[4,1],f32>
    %956 = torch.aten.unsqueeze %954, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
    %957 = torch.aten.mul.Tensor %955, %956 : !torch.vtensor<[4,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[4,160],f32>
    %958 = torch.aten.mul.Scalar %957, %int1 : !torch.vtensor<[4,160],f32>, !torch.int -> !torch.vtensor<[4,160],f32>
    %959 = torch.aten.sin %958 : !torch.vtensor<[4,160],f32> -> !torch.vtensor<[4,160],f32>
    %960 = torch.aten.cos %958 : !torch.vtensor<[4,160],f32> -> !torch.vtensor<[4,160],f32>
    %961 = torch.prim.ListConstruct %959, %960 : (!torch.vtensor<[4,160],f32>, !torch.vtensor<[4,160],f32>) -> !torch.list<vtensor>
    %962 = torch.aten.cat %961, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,320],f32>
    %963 = torch.aten.slice.Tensor %962, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[4,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,160],f32>
    %964 = torch.aten.slice.Tensor %962, %int1, %int0, %int160, %int1 : !torch.vtensor<[4,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,160],f32>
    %965 = torch.prim.ListConstruct %963, %964 : (!torch.vtensor<[4,160],f32>, !torch.vtensor<[4,160],f32>) -> !torch.list<vtensor>
    %966 = torch.aten.cat %965, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,320],f32>
    %967 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %968 = torch.aten.to.dtype %967, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %969 = torch.prim.ListConstruct %int4, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %970 = torch.aten.broadcast_to %968, %969 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320],f16>
    %971 = torch.aten.to.dtype %966, %int5, %false, %false, %none : !torch.vtensor<[4,320],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320],f16>
    %972 = torch.prim.ListConstruct %int4, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %973 = torch.aten.broadcast_to %971, %972 : !torch.vtensor<[4,320],f16>, !torch.list<int> -> !torch.vtensor<[4,320],f16>
    %974 = torch.aten.transpose.int %945, %int0, %int1 : !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,1280],f16>
    %975 = torch.aten.mm %973, %974 : !torch.vtensor<[4,320],f16>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %976 = torch.aten.mul.Scalar %944, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %977 = torch.aten.add.Tensor %976, %975, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %978 = torch.aten.sigmoid %977 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %979 = torch.aten.mul.Tensor %978, %977 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %980 = torch.aten.transpose.int %943, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %981 = torch.aten.mm %979, %980 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %982 = torch.aten.mul.Scalar %942, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %983 = torch.aten.add.Tensor %982, %981, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %984 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
    %985 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
    %986 = torch.aten.convolution %948, %941, %940, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,4,96,96],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %987 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %988 = torch.aten.view %986, %987 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f16>
    %989 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %990 = torch.aten.to.dtype %989, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %991 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %992 = torch.aten.broadcast_to %990, %991 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %993 = torch.aten.to.dtype %988, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f32>
    %994 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %995 = torch.aten.broadcast_to %993, %994 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %996 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
    %997 = torch.aten.to.dtype %995, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f64>
    %998 = torch.aten.sum.dim_IntList %997, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %999 = torch.aten.div.Scalar %998, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1000 = torch.aten.sub.Tensor %997, %999, %float1.000000e00 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,9216],f64>
    %1001 = torch.aten.mul.Tensor %1000, %1000 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,10,9216],f64> -> !torch.vtensor<[4,32,10,9216],f64>
    %1002 = torch.aten.sum.dim_IntList %1001, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1003 = torch.aten.div.Scalar %1002, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1004 = torch.aten.to.dtype %1003, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1005 = torch.aten.sum.dim_IntList %995, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1006 = torch.aten.div.Scalar %1005, %int92160 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1007 = torch.aten.add.Tensor %1004, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1008 = torch.aten.rsqrt %1007 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %1009 = torch.aten.sub.Tensor %988, %1006, %int1 : !torch.vtensor<[4,32,10,9216],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,9216],f32>
    %1010 = torch.aten.mul.Tensor %1009, %1008 : !torch.vtensor<[4,32,10,9216],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,9216],f32>
    %1011 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1012 = torch.aten.view %1010, %1011 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f32>
    %1013 = torch.aten.unsqueeze %938, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1014 = torch.aten.unsqueeze %1013, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1015 = torch.aten.unsqueeze %1014, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1016 = torch.aten.unsqueeze %937, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1017 = torch.aten.unsqueeze %1016, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1018 = torch.aten.unsqueeze %1017, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1019 = torch.aten.mul.Tensor %1012, %1018 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,96,96],f32>
    %1020 = torch.aten.add.Tensor %1019, %1015, %int1 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f32>
    %1021 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1022 = torch.aten.to.dtype %1021, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1023 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1024 = torch.aten.broadcast_to %1022, %1023 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1025 = torch.aten.to.dtype %1020, %int5, %false, %false, %none : !torch.vtensor<[4,320,96,96],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,96,96],f16>
    %1026 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1027 = torch.aten.broadcast_to %1025, %1026 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1028 = torch.aten.sigmoid %1027 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1029 = torch.aten.mul.Tensor %1028, %1027 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1030 = torch.prim.ListConstruct %int4, %int20, %int16, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1031 = torch.aten.view %1029, %1030 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1032 = torch.aten.abs %1031 : !torch.vtensor<[4,20,16,96,96],f16> -> !torch.vtensor<[4,20,16,96,96],f16>
    %values, %indices = torch.aten.max.dim %1032, %int2, %true : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,96,96],f16>, !torch.vtensor<[4,20,1,96,96],si64>
    %1033 = torch.prim.ListConstruct %int4, %int20, %int1, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1034 = torch.aten.view %values, %1033 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,96,96],f16>
    %1035 = torch.aten.broadcast_to %1034, %1030 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1036 = torch.aten.clone %1035, %int0 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int -> !torch.vtensor<[4,20,16,96,96],f16>
    %1037 = torch.aten.view %1036, %1011 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1038 = torch.aten.div.Scalar %1037, %int128 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1039 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1040 = torch.aten.detach %1039 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1041 = torch.aten.div.Tensor %1029, %1038 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1042 = torch.aten.add.Tensor %1041, %1040, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1043 = torch.aten.round %1042 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1044 = torch.aten.clamp %1043, %int-128, %int127 : !torch.vtensor<[4,320,96,96],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1045 = torch.aten.sub.Tensor %1044, %1040, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1046 = torch.aten.mul.Tensor %1045, %1038 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1047 = torch.prim.ListConstruct %int320, %int20, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1048 = torch.aten.broadcast_to %935, %1047 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16>
    %1049 = torch.aten.clone %1048, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16>
    %1050 = torch.prim.ListConstruct %int320, %int320, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1051 = torch.aten.view %1049, %1050 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16>
    %1052 = torch.aten.mul.Tensor %934, %1051 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16>
    %1053 = torch.aten.convolution %1046, %1052, %933, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1054 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %1055 = torch.aten.mul.Tensor %1054, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %1056 = torch.aten.transpose.int %932, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %1057 = torch.aten.mm %1055, %1056 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[4,320],f16>
    %1058 = torch.aten.mul.Scalar %931, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1059 = torch.aten.add.Tensor %1058, %1057, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[4,320],f16>, !torch.int -> !torch.vtensor<[4,320],f16>
    %1060 = torch.aten.unsqueeze %1059, %int2 : !torch.vtensor<[4,320],f16>, !torch.int -> !torch.vtensor<[4,320,1],f16>
    %1061 = torch.aten.unsqueeze %1060, %int3 : !torch.vtensor<[4,320,1],f16>, !torch.int -> !torch.vtensor<[4,320,1,1],f16>
    %1062 = torch.aten.add.Tensor %1053, %1061, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1063 = torch.aten.view %1062, %987 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f16>
    %1064 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1065 = torch.aten.to.dtype %1064, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1066 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1067 = torch.aten.broadcast_to %1065, %1066 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1068 = torch.aten.to.dtype %1063, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f32>
    %1069 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1070 = torch.aten.broadcast_to %1068, %1069 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1071 = torch.aten.to.dtype %1070, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f64>
    %1072 = torch.aten.sum.dim_IntList %1071, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1073 = torch.aten.div.Scalar %1072, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1074 = torch.aten.sub.Tensor %1071, %1073, %float1.000000e00 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,9216],f64>
    %1075 = torch.aten.mul.Tensor %1074, %1074 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,10,9216],f64> -> !torch.vtensor<[4,32,10,9216],f64>
    %1076 = torch.aten.sum.dim_IntList %1075, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1077 = torch.aten.div.Scalar %1076, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1078 = torch.aten.to.dtype %1077, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1079 = torch.aten.sum.dim_IntList %1070, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1080 = torch.aten.div.Scalar %1079, %int92160 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1081 = torch.aten.add.Tensor %1078, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1082 = torch.aten.rsqrt %1081 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %1083 = torch.aten.sub.Tensor %1063, %1080, %int1 : !torch.vtensor<[4,32,10,9216],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,9216],f32>
    %1084 = torch.aten.mul.Tensor %1083, %1082 : !torch.vtensor<[4,32,10,9216],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,9216],f32>
    %1085 = torch.aten.view %1084, %1011 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f32>
    %1086 = torch.aten.unsqueeze %930, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1087 = torch.aten.unsqueeze %1086, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1088 = torch.aten.unsqueeze %1087, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1089 = torch.aten.unsqueeze %929, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1090 = torch.aten.unsqueeze %1089, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1091 = torch.aten.unsqueeze %1090, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1092 = torch.aten.mul.Tensor %1085, %1091 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,96,96],f32>
    %1093 = torch.aten.add.Tensor %1092, %1088, %int1 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f32>
    %1094 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1095 = torch.aten.to.dtype %1094, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1096 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1097 = torch.aten.broadcast_to %1095, %1096 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1098 = torch.aten.to.dtype %1093, %int5, %false, %false, %none : !torch.vtensor<[4,320,96,96],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,96,96],f16>
    %1099 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1100 = torch.aten.broadcast_to %1098, %1099 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1101 = torch.aten.sigmoid %1100 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1102 = torch.aten.mul.Tensor %1101, %1100 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1103 = torch.aten.view %1102, %1030 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1104 = torch.aten.abs %1103 : !torch.vtensor<[4,20,16,96,96],f16> -> !torch.vtensor<[4,20,16,96,96],f16>
    %values_0, %indices_1 = torch.aten.max.dim %1104, %int2, %true : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,96,96],f16>, !torch.vtensor<[4,20,1,96,96],si64>
    %1105 = torch.aten.view %values_0, %1033 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,96,96],f16>
    %1106 = torch.aten.broadcast_to %1105, %1030 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1107 = torch.aten.clone %1106, %int0 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int -> !torch.vtensor<[4,20,16,96,96],f16>
    %1108 = torch.aten.view %1107, %1011 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1109 = torch.aten.div.Scalar %1108, %int128 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1110 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1111 = torch.aten.detach %1110 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1112 = torch.aten.div.Tensor %1102, %1109 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1113 = torch.aten.add.Tensor %1112, %1111, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1114 = torch.aten.round %1113 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1115 = torch.aten.clamp %1114, %int-128, %int127 : !torch.vtensor<[4,320,96,96],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1116 = torch.aten.sub.Tensor %1115, %1111, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1117 = torch.aten.mul.Tensor %1116, %1109 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1118 = torch.aten.broadcast_to %928, %1047 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16>
    %1119 = torch.aten.clone %1118, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16>
    %1120 = torch.aten.view %1119, %1050 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16>
    %1121 = torch.aten.mul.Tensor %927, %1120 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16>
    %1122 = torch.aten.convolution %1117, %1121, %926, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1123 = torch.aten.add.Tensor %986, %1122, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1124 = torch.aten.div.Tensor %1123, %925 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1125 = torch.aten.view %1124, %987 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f16>
    %1126 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1127 = torch.aten.to.dtype %1126, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1128 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1129 = torch.aten.broadcast_to %1127, %1128 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1130 = torch.aten.to.dtype %1125, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f32>
    %1131 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1132 = torch.aten.broadcast_to %1130, %1131 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1133 = torch.aten.to.dtype %1132, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f64>
    %1134 = torch.aten.sum.dim_IntList %1133, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1135 = torch.aten.div.Scalar %1134, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1136 = torch.aten.sub.Tensor %1133, %1135, %float1.000000e00 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,9216],f64>
    %1137 = torch.aten.mul.Tensor %1136, %1136 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,10,9216],f64> -> !torch.vtensor<[4,32,10,9216],f64>
    %1138 = torch.aten.sum.dim_IntList %1137, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1139 = torch.aten.div.Scalar %1138, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1140 = torch.aten.to.dtype %1139, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1141 = torch.aten.sum.dim_IntList %1132, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1142 = torch.aten.div.Scalar %1141, %int92160 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1143 = torch.aten.add.Tensor %1140, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1144 = torch.aten.rsqrt %1143 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %1145 = torch.aten.sub.Tensor %1125, %1142, %int1 : !torch.vtensor<[4,32,10,9216],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,9216],f32>
    %1146 = torch.aten.mul.Tensor %1145, %1144 : !torch.vtensor<[4,32,10,9216],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,9216],f32>
    %1147 = torch.aten.view %1146, %1011 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f32>
    %1148 = torch.aten.unsqueeze %923, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1149 = torch.aten.unsqueeze %1148, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1150 = torch.aten.unsqueeze %1149, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1151 = torch.aten.unsqueeze %922, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1152 = torch.aten.unsqueeze %1151, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1153 = torch.aten.unsqueeze %1152, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1154 = torch.aten.mul.Tensor %1147, %1153 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,96,96],f32>
    %1155 = torch.aten.add.Tensor %1154, %1150, %int1 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f32>
    %1156 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1157 = torch.aten.to.dtype %1156, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1158 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1159 = torch.aten.broadcast_to %1157, %1158 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1160 = torch.aten.to.dtype %1155, %int5, %false, %false, %none : !torch.vtensor<[4,320,96,96],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,96,96],f16>
    %1161 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1162 = torch.aten.broadcast_to %1160, %1161 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1163 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1164 = torch.aten.permute %1162, %1163 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,96,96,320],f16>
    %1165 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1166 = torch.aten.view %1164, %1165 : !torch.vtensor<[4,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1167 = torch.aten.clone %1166, %int0 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1168 = torch.prim.ListConstruct %int4, %int9216, %int20, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1169 = torch.aten.view %1167, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1170 = torch.aten.abs %1169 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_2, %indices_3 = torch.aten.max.dim %1170, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1171 = torch.prim.ListConstruct %int4, %int9216, %int20, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1172 = torch.aten.view %values_2, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1173 = torch.aten.broadcast_to %1172, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1174 = torch.aten.clone %1173, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1175 = torch.aten.view %1174, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1176 = torch.aten.div.Scalar %1175, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1177 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1178 = torch.aten.detach %1177 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1179 = torch.aten.div.Tensor %1167, %1176 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1180 = torch.aten.add.Tensor %1179, %1178, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1181 = torch.aten.round %1180 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1182 = torch.aten.clamp %1181, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1183 = torch.aten.sub.Tensor %1182, %1178, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1184 = torch.aten.mul.Tensor %1183, %1176 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1185 = torch.prim.ListConstruct %int320, %int20, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1186 = torch.aten.broadcast_to %921, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1187 = torch.aten.clone %1186, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1188 = torch.prim.ListConstruct %int320, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %1189 = torch.aten.view %1187, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1190 = torch.aten.mul.Tensor %920, %1189 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1191 = torch.aten.transpose.int %1190, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1192 = torch.prim.ListConstruct %int36864, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %1193 = torch.aten.view %1184, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1194 = torch.aten.mm %1193, %1191 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1195 = torch.aten.mul.Scalar %919, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1196 = torch.aten.add.Tensor %1195, %1194, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1197 = torch.aten.view %1196, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1198 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1199 = torch.aten.sum.dim_IntList %1197, %1198, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1200 = torch.aten.div.Scalar %1199, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1201 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1202 = torch.aten.broadcast_to %1200, %1201 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1203 = torch.aten.sub.Tensor %1197, %1202, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1204 = torch.aten.mul.Tensor %1203, %1203 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1205 = torch.aten.sum.dim_IntList %1204, %1198, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1206 = torch.aten.div.Scalar %1205, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1207 = torch.aten.add.Scalar %1206, %float1.000000e-05, %int1 : !torch.vtensor<[4,9216,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1208 = torch.aten.rsqrt %1207 : !torch.vtensor<[4,9216,1],f16> -> !torch.vtensor<[4,9216,1],f16>
    %1209 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1210 = torch.aten.broadcast_to %1208, %1209 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1211 = torch.aten.mul.Tensor %1203, %1210 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1212 = torch.aten.mul.Tensor %1211, %918 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1213 = torch.aten.add.Tensor %1212, %917, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1214 = torch.aten.view %1213, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1215 = torch.aten.abs %1214 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_4, %indices_5 = torch.aten.max.dim %1215, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1216 = torch.aten.view %values_4, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1217 = torch.aten.broadcast_to %1216, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1218 = torch.aten.clone %1217, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1219 = torch.aten.view %1218, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1220 = torch.aten.div.Scalar %1219, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1221 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1222 = torch.aten.detach %1221 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1223 = torch.aten.div.Tensor %1213, %1220 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1224 = torch.aten.add.Tensor %1223, %1222, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1225 = torch.aten.round %1224 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1226 = torch.aten.clamp %1225, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1227 = torch.aten.sub.Tensor %1226, %1222, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1228 = torch.aten.mul.Tensor %1227, %1220 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1229 = torch.aten.broadcast_to %916, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1230 = torch.aten.clone %1229, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1231 = torch.aten.view %1230, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1232 = torch.aten.mul.Tensor %915, %1231 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1233 = torch.aten.transpose.int %1232, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1234 = torch.aten.view %1228, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1235 = torch.aten.mm %1234, %1233 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1236 = torch.aten.view %1235, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1237 = torch.prim.ListConstruct %int4, %int9216, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1238 = torch.aten.view %1236, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1239 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1240 = torch.aten.permute %1238, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1241 = torch.aten.clone %1240, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1242 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1243 = torch.aten.view %1241, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1244 = torch.aten.view %1213, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1245 = torch.aten.abs %1244 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_6, %indices_7 = torch.aten.max.dim %1245, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1246 = torch.aten.view %values_6, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1247 = torch.aten.broadcast_to %1246, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1248 = torch.aten.clone %1247, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1249 = torch.aten.view %1248, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1250 = torch.aten.div.Scalar %1249, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1251 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1252 = torch.aten.detach %1251 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1253 = torch.aten.div.Tensor %1213, %1250 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1254 = torch.aten.add.Tensor %1253, %1252, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1255 = torch.aten.round %1254 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1256 = torch.aten.clamp %1255, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1257 = torch.aten.sub.Tensor %1256, %1252, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1258 = torch.aten.mul.Tensor %1257, %1250 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1259 = torch.aten.broadcast_to %914, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1260 = torch.aten.clone %1259, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1261 = torch.aten.view %1260, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1262 = torch.aten.mul.Tensor %913, %1261 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1263 = torch.aten.transpose.int %1262, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1264 = torch.aten.view %1258, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1265 = torch.aten.mm %1264, %1263 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1266 = torch.aten.view %1265, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1267 = torch.aten.view %1213, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1268 = torch.aten.abs %1267 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_8, %indices_9 = torch.aten.max.dim %1268, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1269 = torch.aten.view %values_8, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1270 = torch.aten.broadcast_to %1269, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1271 = torch.aten.clone %1270, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1272 = torch.aten.view %1271, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1273 = torch.aten.div.Scalar %1272, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1274 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1275 = torch.aten.detach %1274 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1276 = torch.aten.div.Tensor %1213, %1273 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1277 = torch.aten.add.Tensor %1276, %1275, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1278 = torch.aten.round %1277 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1279 = torch.aten.clamp %1278, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1280 = torch.aten.sub.Tensor %1279, %1275, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1281 = torch.aten.mul.Tensor %1280, %1273 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1282 = torch.aten.broadcast_to %912, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1283 = torch.aten.clone %1282, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1284 = torch.aten.view %1283, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1285 = torch.aten.mul.Tensor %911, %1284 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1286 = torch.aten.transpose.int %1285, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1287 = torch.aten.view %1281, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1288 = torch.aten.mm %1287, %1286 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1289 = torch.aten.view %1288, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1290 = torch.aten.view %1266, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1291 = torch.aten.permute %1290, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1292 = torch.aten.clone %1291, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1293 = torch.aten.view %1292, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1294 = torch.aten.view %1289, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1295 = torch.aten.permute %1294, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1296 = torch.aten.clone %1295, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1297 = torch.aten.view %1296, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1298 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1299 = torch.aten.to.dtype %1298, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1300 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1301 = torch.aten.broadcast_to %1299, %1300 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1302 = torch.aten.to.dtype %1243, %int6, %false, %false, %none : !torch.vtensor<[20,9216,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,64],f32>
    %1303 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1304 = torch.aten.broadcast_to %1302, %1303 : !torch.vtensor<[20,9216,64],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1305 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1306 = torch.aten.to.dtype %1305, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1307 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1308 = torch.aten.broadcast_to %1306, %1307 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1309 = torch.aten.to.dtype %1293, %int6, %false, %false, %none : !torch.vtensor<[20,9216,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,64],f32>
    %1310 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1311 = torch.aten.broadcast_to %1309, %1310 : !torch.vtensor<[20,9216,64],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1312 = torch.prim.ListConstruct %int20, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1313 = torch.aten.empty.memory_format %1312, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,9216],f32>
    %1314 = torch.aten.transpose.int %1311, %int-1, %int-2 : !torch.vtensor<[20,9216,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[20,64,9216],f32>
    %1315 = torch.aten.bmm %1304, %1314 : !torch.vtensor<[20,9216,64],f32>, !torch.vtensor<[20,64,9216],f32> -> !torch.vtensor<[20,9216,9216],f32>
    %1316 = torch.aten.mul.Scalar %1315, %float1.250000e-01 : !torch.vtensor<[20,9216,9216],f32>, !torch.float -> !torch.vtensor<[20,9216,9216],f32>
    %1317 = torch.aten.add.Tensor %1316, %1313, %int0 : !torch.vtensor<[20,9216,9216],f32>, !torch.vtensor<[20,9216,9216],f32>, !torch.int -> !torch.vtensor<[20,9216,9216],f32>
    %values_10, %indices_11 = torch.aten.max.dim %1317, %int-1, %true : !torch.vtensor<[20,9216,9216],f32>, !torch.int, !torch.bool -> !torch.vtensor<[20,9216,1],f32>, !torch.vtensor<[20,9216,1],si64>
    %1318 = torch.aten.sub.Tensor %1317, %values_10, %float1.000000e00 : !torch.vtensor<[20,9216,9216],f32>, !torch.vtensor<[20,9216,1],f32>, !torch.float -> !torch.vtensor<[20,9216,9216],f32>
    %1319 = torch.aten.exp %1318 : !torch.vtensor<[20,9216,9216],f32> -> !torch.vtensor<[20,9216,9216],f32>
    %1320 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1321 = torch.aten.sum.dim_IntList %1319, %1320, %true, %none : !torch.vtensor<[20,9216,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,1],f32>
    %1322 = torch.aten.div.Tensor %1319, %1321 : !torch.vtensor<[20,9216,9216],f32>, !torch.vtensor<[20,9216,1],f32> -> !torch.vtensor<[20,9216,9216],f32>
    %1323 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1324 = torch.aten.to.dtype %1323, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1325 = torch.prim.ListConstruct %int20, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1326 = torch.aten.broadcast_to %1324, %1325 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,9216],f16>
    %1327 = torch.aten.to.dtype %1322, %int5, %false, %false, %none : !torch.vtensor<[20,9216,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,9216],f16>
    %1328 = torch.prim.ListConstruct %int20, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1329 = torch.aten.broadcast_to %1327, %1328 : !torch.vtensor<[20,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,9216],f16>
    %1330 = torch.aten.bmm %1329, %1297 : !torch.vtensor<[20,9216,9216],f16>, !torch.vtensor<[20,9216,64],f16> -> !torch.vtensor<[20,9216,64],f16>
    %1331 = torch.prim.ListConstruct %int4, %int5, %int9216, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1332 = torch.aten.view %1330, %1331 : !torch.vtensor<[20,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1333 = torch.aten.permute %1332, %1239 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1334 = torch.aten.clone %1333, %int0 : !torch.vtensor<[4,9216,5,64],f16>, !torch.int -> !torch.vtensor<[4,9216,5,64],f16>
    %1335 = torch.aten.view %1334, %1165 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1336 = torch.aten.view %1335, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1337 = torch.aten.abs %1336 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_12, %indices_13 = torch.aten.max.dim %1337, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1338 = torch.aten.view %values_12, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1339 = torch.aten.broadcast_to %1338, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1340 = torch.aten.clone %1339, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1341 = torch.aten.view %1340, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1342 = torch.aten.div.Scalar %1341, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1343 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1344 = torch.aten.detach %1343 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1345 = torch.aten.div.Tensor %1335, %1342 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1346 = torch.aten.add.Tensor %1345, %1344, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1347 = torch.aten.round %1346 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1348 = torch.aten.clamp %1347, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1349 = torch.aten.sub.Tensor %1348, %1344, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1350 = torch.aten.mul.Tensor %1349, %1342 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1351 = torch.aten.broadcast_to %910, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1352 = torch.aten.clone %1351, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1353 = torch.aten.view %1352, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1354 = torch.aten.mul.Tensor %909, %1353 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1355 = torch.aten.transpose.int %1354, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1356 = torch.aten.view %1350, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1357 = torch.aten.mm %1356, %1355 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1358 = torch.aten.mul.Scalar %908, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1359 = torch.aten.add.Tensor %1358, %1357, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1360 = torch.aten.view %1359, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1361 = torch.aten.add.Tensor %1360, %1197, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1362 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1363 = torch.aten.sum.dim_IntList %1361, %1362, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1364 = torch.aten.div.Scalar %1363, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1365 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1366 = torch.aten.broadcast_to %1364, %1365 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1367 = torch.aten.sub.Tensor %1361, %1366, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1368 = torch.aten.mul.Tensor %1367, %1367 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1369 = torch.aten.sum.dim_IntList %1368, %1362, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1370 = torch.aten.div.Scalar %1369, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1371 = torch.aten.add.Scalar %1370, %float1.000000e-05, %int1 : !torch.vtensor<[4,9216,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1372 = torch.aten.rsqrt %1371 : !torch.vtensor<[4,9216,1],f16> -> !torch.vtensor<[4,9216,1],f16>
    %1373 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1374 = torch.aten.broadcast_to %1372, %1373 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1375 = torch.aten.mul.Tensor %1367, %1374 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1376 = torch.aten.mul.Tensor %1375, %907 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1377 = torch.aten.add.Tensor %1376, %906, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1378 = torch.aten.view %1377, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1379 = torch.aten.abs %1378 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_14, %indices_15 = torch.aten.max.dim %1379, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1380 = torch.aten.view %values_14, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1381 = torch.aten.broadcast_to %1380, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1382 = torch.aten.clone %1381, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1383 = torch.aten.view %1382, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1384 = torch.aten.div.Scalar %1383, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1385 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1386 = torch.aten.detach %1385 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1387 = torch.aten.div.Tensor %1377, %1384 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1388 = torch.aten.add.Tensor %1387, %1386, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1389 = torch.aten.round %1388 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1390 = torch.aten.clamp %1389, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1391 = torch.aten.sub.Tensor %1390, %1386, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1392 = torch.aten.mul.Tensor %1391, %1384 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1393 = torch.aten.broadcast_to %905, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1394 = torch.aten.clone %1393, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1395 = torch.aten.view %1394, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1396 = torch.aten.mul.Tensor %904, %1395 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1397 = torch.aten.transpose.int %1396, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1398 = torch.aten.view %1392, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1399 = torch.aten.mm %1398, %1397 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1400 = torch.aten.view %1399, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1401 = torch.aten.view %1400, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1402 = torch.aten.permute %1401, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1403 = torch.aten.clone %1402, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1404 = torch.aten.view %1403, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1405 = torch.prim.ListConstruct %int4, %int77, %int64, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1406 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %1407 = torch.aten.abs %1406 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_16, %indices_17 = torch.aten.max.dim %1407, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %1408 = torch.prim.ListConstruct %int4, %int77, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1409 = torch.aten.view %values_16, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %1410 = torch.aten.broadcast_to %1409, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %1411 = torch.aten.clone %1410, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %1412 = torch.prim.ListConstruct %int4, %int77, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1413 = torch.aten.view %1411, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %1414 = torch.aten.div.Scalar %1413, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1415 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1416 = torch.aten.detach %1415 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1417 = torch.aten.div.Tensor %arg2, %1414 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %1418 = torch.aten.add.Tensor %1417, %1416, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1419 = torch.aten.round %1418 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %1420 = torch.aten.clamp %1419, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1421 = torch.aten.sub.Tensor %1420, %1416, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1422 = torch.aten.mul.Tensor %1421, %1414 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %1423 = torch.prim.ListConstruct %int320, %int64, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1424 = torch.aten.broadcast_to %903, %1423 : !torch.vtensor<[320,64,1],f16>, !torch.list<int> -> !torch.vtensor<[320,64,16],f16>
    %1425 = torch.aten.clone %1424, %int0 : !torch.vtensor<[320,64,16],f16>, !torch.int -> !torch.vtensor<[320,64,16],f16>
    %1426 = torch.prim.ListConstruct %int320, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
    %1427 = torch.aten.view %1425, %1426 : !torch.vtensor<[320,64,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1024],f16>
    %1428 = torch.aten.mul.Tensor %902, %1427 : !torch.vtensor<[320,1024],si8>, !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[320,1024],f16>
    %1429 = torch.aten.transpose.int %1428, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16>
    %1430 = torch.prim.ListConstruct %int308, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
    %1431 = torch.aten.view %1422, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %1432 = torch.aten.mm %1431, %1429 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[308,320],f16>
    %1433 = torch.prim.ListConstruct %int4, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1434 = torch.aten.view %1432, %1433 : !torch.vtensor<[308,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,320],f16>
    %1435 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %1436 = torch.aten.abs %1435 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_18, %indices_19 = torch.aten.max.dim %1436, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %1437 = torch.aten.view %values_18, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %1438 = torch.aten.broadcast_to %1437, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %1439 = torch.aten.clone %1438, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %1440 = torch.aten.view %1439, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %1441 = torch.aten.div.Scalar %1440, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1442 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1443 = torch.aten.detach %1442 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1444 = torch.aten.div.Tensor %arg2, %1441 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %1445 = torch.aten.add.Tensor %1444, %1443, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1446 = torch.aten.round %1445 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %1447 = torch.aten.clamp %1446, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1448 = torch.aten.sub.Tensor %1447, %1443, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %1449 = torch.aten.mul.Tensor %1448, %1441 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %1450 = torch.aten.broadcast_to %901, %1423 : !torch.vtensor<[320,64,1],f16>, !torch.list<int> -> !torch.vtensor<[320,64,16],f16>
    %1451 = torch.aten.clone %1450, %int0 : !torch.vtensor<[320,64,16],f16>, !torch.int -> !torch.vtensor<[320,64,16],f16>
    %1452 = torch.aten.view %1451, %1426 : !torch.vtensor<[320,64,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1024],f16>
    %1453 = torch.aten.mul.Tensor %900, %1452 : !torch.vtensor<[320,1024],si8>, !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[320,1024],f16>
    %1454 = torch.aten.transpose.int %1453, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16>
    %1455 = torch.aten.view %1449, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %1456 = torch.aten.mm %1455, %1454 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[308,320],f16>
    %1457 = torch.aten.view %1456, %1433 : !torch.vtensor<[308,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,320],f16>
    %1458 = torch.prim.ListConstruct %int4, %int77, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1459 = torch.aten.view %1434, %1458 : !torch.vtensor<[4,77,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,5,64],f16>
    %1460 = torch.aten.permute %1459, %1239 : !torch.vtensor<[4,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,77,64],f16>
    %1461 = torch.aten.clone %1460, %int0 : !torch.vtensor<[4,5,77,64],f16>, !torch.int -> !torch.vtensor<[4,5,77,64],f16>
    %1462 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1463 = torch.aten.view %1461, %1462 : !torch.vtensor<[4,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
    %1464 = torch.aten.view %1457, %1458 : !torch.vtensor<[4,77,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,5,64],f16>
    %1465 = torch.aten.permute %1464, %1239 : !torch.vtensor<[4,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,77,64],f16>
    %1466 = torch.aten.clone %1465, %int0 : !torch.vtensor<[4,5,77,64],f16>, !torch.int -> !torch.vtensor<[4,5,77,64],f16>
    %1467 = torch.aten.view %1466, %1462 : !torch.vtensor<[4,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
    %1468 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1469 = torch.aten.to.dtype %1468, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1470 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1471 = torch.aten.broadcast_to %1469, %1470 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1472 = torch.aten.to.dtype %1404, %int6, %false, %false, %none : !torch.vtensor<[20,9216,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,64],f32>
    %1473 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1474 = torch.aten.broadcast_to %1472, %1473 : !torch.vtensor<[20,9216,64],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1475 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1476 = torch.aten.to.dtype %1475, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1477 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1478 = torch.aten.broadcast_to %1476, %1477 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,77,64],f32>
    %1479 = torch.aten.to.dtype %1463, %int6, %false, %false, %none : !torch.vtensor<[20,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,77,64],f32>
    %1480 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1481 = torch.aten.broadcast_to %1479, %1480 : !torch.vtensor<[20,77,64],f32>, !torch.list<int> -> !torch.vtensor<[20,77,64],f32>
    %1482 = torch.prim.ListConstruct %int20, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1483 = torch.aten.empty.memory_format %1482, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,77],f32>
    %1484 = torch.aten.transpose.int %1481, %int-1, %int-2 : !torch.vtensor<[20,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[20,64,77],f32>
    %1485 = torch.aten.bmm %1474, %1484 : !torch.vtensor<[20,9216,64],f32>, !torch.vtensor<[20,64,77],f32> -> !torch.vtensor<[20,9216,77],f32>
    %1486 = torch.aten.mul.Scalar %1485, %float1.250000e-01 : !torch.vtensor<[20,9216,77],f32>, !torch.float -> !torch.vtensor<[20,9216,77],f32>
    %1487 = torch.aten.add.Tensor %1486, %1483, %int0 : !torch.vtensor<[20,9216,77],f32>, !torch.vtensor<[20,9216,77],f32>, !torch.int -> !torch.vtensor<[20,9216,77],f32>
    %values_20, %indices_21 = torch.aten.max.dim %1487, %int-1, %true : !torch.vtensor<[20,9216,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[20,9216,1],f32>, !torch.vtensor<[20,9216,1],si64>
    %1488 = torch.aten.sub.Tensor %1487, %values_20, %float1.000000e00 : !torch.vtensor<[20,9216,77],f32>, !torch.vtensor<[20,9216,1],f32>, !torch.float -> !torch.vtensor<[20,9216,77],f32>
    %1489 = torch.aten.exp %1488 : !torch.vtensor<[20,9216,77],f32> -> !torch.vtensor<[20,9216,77],f32>
    %1490 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1491 = torch.aten.sum.dim_IntList %1489, %1490, %true, %none : !torch.vtensor<[20,9216,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,1],f32>
    %1492 = torch.aten.div.Tensor %1489, %1491 : !torch.vtensor<[20,9216,77],f32>, !torch.vtensor<[20,9216,1],f32> -> !torch.vtensor<[20,9216,77],f32>
    %1493 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1494 = torch.aten.to.dtype %1493, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1495 = torch.prim.ListConstruct %int20, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1496 = torch.aten.broadcast_to %1494, %1495 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,77],f16>
    %1497 = torch.aten.to.dtype %1492, %int5, %false, %false, %none : !torch.vtensor<[20,9216,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,77],f16>
    %1498 = torch.prim.ListConstruct %int20, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1499 = torch.aten.broadcast_to %1497, %1498 : !torch.vtensor<[20,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,77],f16>
    %1500 = torch.aten.bmm %1499, %1467 : !torch.vtensor<[20,9216,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,9216,64],f16>
    %1501 = torch.aten.view %1500, %1331 : !torch.vtensor<[20,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1502 = torch.aten.permute %1501, %1239 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1503 = torch.aten.clone %1502, %int0 : !torch.vtensor<[4,9216,5,64],f16>, !torch.int -> !torch.vtensor<[4,9216,5,64],f16>
    %1504 = torch.aten.view %1503, %1165 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1505 = torch.aten.view %1504, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1506 = torch.aten.abs %1505 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_22, %indices_23 = torch.aten.max.dim %1506, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1507 = torch.aten.view %values_22, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1508 = torch.aten.broadcast_to %1507, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1509 = torch.aten.clone %1508, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1510 = torch.aten.view %1509, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1511 = torch.aten.div.Scalar %1510, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1512 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1513 = torch.aten.detach %1512 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1514 = torch.aten.div.Tensor %1504, %1511 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1515 = torch.aten.add.Tensor %1514, %1513, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1516 = torch.aten.round %1515 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1517 = torch.aten.clamp %1516, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1518 = torch.aten.sub.Tensor %1517, %1513, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1519 = torch.aten.mul.Tensor %1518, %1511 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1520 = torch.aten.broadcast_to %899, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1521 = torch.aten.clone %1520, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1522 = torch.aten.view %1521, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1523 = torch.aten.mul.Tensor %898, %1522 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1524 = torch.aten.transpose.int %1523, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1525 = torch.aten.view %1519, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1526 = torch.aten.mm %1525, %1524 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1527 = torch.aten.mul.Scalar %897, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1528 = torch.aten.add.Tensor %1527, %1526, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1529 = torch.aten.view %1528, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1530 = torch.aten.add.Tensor %1529, %1361, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1531 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1532 = torch.aten.sum.dim_IntList %1530, %1531, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1533 = torch.aten.div.Scalar %1532, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1534 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1535 = torch.aten.broadcast_to %1533, %1534 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1536 = torch.aten.sub.Tensor %1530, %1535, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1537 = torch.aten.mul.Tensor %1536, %1536 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1538 = torch.aten.sum.dim_IntList %1537, %1531, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1539 = torch.aten.div.Scalar %1538, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1540 = torch.aten.add.Scalar %1539, %float1.000000e-05, %int1 : !torch.vtensor<[4,9216,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1541 = torch.aten.rsqrt %1540 : !torch.vtensor<[4,9216,1],f16> -> !torch.vtensor<[4,9216,1],f16>
    %1542 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1543 = torch.aten.broadcast_to %1541, %1542 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1544 = torch.aten.mul.Tensor %1536, %1543 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1545 = torch.aten.mul.Tensor %1544, %896 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1546 = torch.aten.add.Tensor %1545, %895, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1547 = torch.aten.view %1546, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1548 = torch.aten.abs %1547 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_24, %indices_25 = torch.aten.max.dim %1548, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1549 = torch.aten.view %values_24, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1550 = torch.aten.broadcast_to %1549, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1551 = torch.aten.clone %1550, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1552 = torch.aten.view %1551, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1553 = torch.aten.div.Scalar %1552, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1554 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1555 = torch.aten.detach %1554 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1556 = torch.aten.div.Tensor %1546, %1553 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1557 = torch.aten.add.Tensor %1556, %1555, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1558 = torch.aten.round %1557 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1559 = torch.aten.clamp %1558, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1560 = torch.aten.sub.Tensor %1559, %1555, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1561 = torch.aten.mul.Tensor %1560, %1553 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1562 = torch.prim.ListConstruct %int2560, %int20, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1563 = torch.aten.broadcast_to %894, %1562 : !torch.vtensor<[2560,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2560,20,16],f16>
    %1564 = torch.aten.clone %1563, %int0 : !torch.vtensor<[2560,20,16],f16>, !torch.int -> !torch.vtensor<[2560,20,16],f16>
    %1565 = torch.prim.ListConstruct %int2560, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
    %1566 = torch.aten.view %1564, %1565 : !torch.vtensor<[2560,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2560,320],f16>
    %1567 = torch.aten.mul.Tensor %893, %1566 : !torch.vtensor<[2560,320],si8>, !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[2560,320],f16>
    %1568 = torch.aten.transpose.int %1567, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %1569 = torch.aten.view %1561, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1570 = torch.aten.mm %1569, %1568 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[36864,2560],f16>
    %1571 = torch.aten.mul.Scalar %892, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %1572 = torch.aten.add.Tensor %1571, %1570, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[36864,2560],f16>, !torch.int -> !torch.vtensor<[36864,2560],f16>
    %1573 = torch.prim.ListConstruct %int4, %int9216, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1574 = torch.aten.view %1572, %1573 : !torch.vtensor<[36864,2560],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,2560],f16>
    %1575 = torch.aten.slice.Tensor %1574, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[4,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %1576 = torch.aten.slice.Tensor %1574, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[4,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %1577 = torch.aten.gelu %1576, %str : !torch.vtensor<[4,9216,1280],f16>, !torch.str -> !torch.vtensor<[4,9216,1280],f16>
    %1578 = torch.aten.mul.Tensor %1575, %1577 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %1579 = torch.prim.ListConstruct %int4, %int9216, %int80, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1580 = torch.aten.view %1578, %1579 : !torch.vtensor<[4,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,80,16],f16>
    %1581 = torch.aten.abs %1580 : !torch.vtensor<[4,9216,80,16],f16> -> !torch.vtensor<[4,9216,80,16],f16>
    %values_26, %indices_27 = torch.aten.max.dim %1581, %int3, %true : !torch.vtensor<[4,9216,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,80,1],f16>, !torch.vtensor<[4,9216,80,1],si64>
    %1582 = torch.prim.ListConstruct %int4, %int9216, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1583 = torch.aten.view %values_26, %1582 : !torch.vtensor<[4,9216,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,80,1],f16>
    %1584 = torch.aten.broadcast_to %1583, %1579 : !torch.vtensor<[4,9216,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,80,16],f16>
    %1585 = torch.aten.clone %1584, %int0 : !torch.vtensor<[4,9216,80,16],f16>, !torch.int -> !torch.vtensor<[4,9216,80,16],f16>
    %1586 = torch.prim.ListConstruct %int4, %int9216, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1587 = torch.aten.view %1585, %1586 : !torch.vtensor<[4,9216,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,1280],f16>
    %1588 = torch.aten.div.Scalar %1587, %int128 : !torch.vtensor<[4,9216,1280],f16>, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %1589 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1590 = torch.aten.detach %1589 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1591 = torch.aten.div.Tensor %1578, %1588 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %1592 = torch.aten.add.Tensor %1591, %1590, %int1 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %1593 = torch.aten.round %1592 : !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %1594 = torch.aten.clamp %1593, %int-128, %int127 : !torch.vtensor<[4,9216,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %1595 = torch.aten.sub.Tensor %1594, %1590, %int1 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %1596 = torch.aten.mul.Tensor %1595, %1588 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %1597 = torch.prim.ListConstruct %int320, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1598 = torch.aten.broadcast_to %891, %1597 : !torch.vtensor<[320,80,1],f16>, !torch.list<int> -> !torch.vtensor<[320,80,16],f16>
    %1599 = torch.aten.clone %1598, %int0 : !torch.vtensor<[320,80,16],f16>, !torch.int -> !torch.vtensor<[320,80,16],f16>
    %1600 = torch.prim.ListConstruct %int320, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %1601 = torch.aten.view %1599, %1600 : !torch.vtensor<[320,80,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1280],f16>
    %1602 = torch.aten.mul.Tensor %890, %1601 : !torch.vtensor<[320,1280],si8>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[320,1280],f16>
    %1603 = torch.aten.transpose.int %1602, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %1604 = torch.prim.ListConstruct %int36864, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %1605 = torch.aten.view %1596, %1604 : !torch.vtensor<[4,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[36864,1280],f16>
    %1606 = torch.aten.mm %1605, %1603 : !torch.vtensor<[36864,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1607 = torch.aten.mul.Scalar %889, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1608 = torch.aten.add.Tensor %1607, %1606, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1609 = torch.aten.view %1608, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1610 = torch.aten.add.Tensor %1609, %1530, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1611 = torch.aten.view %1610, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1612 = torch.aten.abs %1611 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_28, %indices_29 = torch.aten.max.dim %1612, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1613 = torch.aten.view %values_28, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1614 = torch.aten.broadcast_to %1613, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1615 = torch.aten.clone %1614, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1616 = torch.aten.view %1615, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1617 = torch.aten.div.Scalar %1616, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1618 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1619 = torch.aten.detach %1618 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1620 = torch.aten.div.Tensor %1610, %1617 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1621 = torch.aten.add.Tensor %1620, %1619, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1622 = torch.aten.round %1621 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1623 = torch.aten.clamp %1622, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1624 = torch.aten.sub.Tensor %1623, %1619, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1625 = torch.aten.mul.Tensor %1624, %1617 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1626 = torch.aten.broadcast_to %888, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1627 = torch.aten.clone %1626, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1628 = torch.aten.view %1627, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1629 = torch.aten.mul.Tensor %887, %1628 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1630 = torch.aten.transpose.int %1629, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1631 = torch.aten.view %1625, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1632 = torch.aten.mm %1631, %1630 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1633 = torch.aten.mul.Scalar %886, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1634 = torch.aten.add.Tensor %1633, %1632, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1635 = torch.aten.view %1634, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1636 = torch.prim.ListConstruct %int4, %int96, %int96, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1637 = torch.aten.view %1635, %1636 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,96,96,320],f16>
    %1638 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1639 = torch.aten.permute %1637, %1638 : !torch.vtensor<[4,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1640 = torch.aten.clone %1639, %int0 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1641 = torch.aten.add.Tensor %1640, %1124, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1642 = torch.aten.view %1641, %987 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f16>
    %1643 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1644 = torch.aten.to.dtype %1643, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1645 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1646 = torch.aten.broadcast_to %1644, %1645 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1647 = torch.aten.to.dtype %1642, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f32>
    %1648 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1649 = torch.aten.broadcast_to %1647, %1648 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1650 = torch.aten.to.dtype %1649, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f64>
    %1651 = torch.aten.sum.dim_IntList %1650, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1652 = torch.aten.div.Scalar %1651, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1653 = torch.aten.sub.Tensor %1650, %1652, %float1.000000e00 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,9216],f64>
    %1654 = torch.aten.mul.Tensor %1653, %1653 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,10,9216],f64> -> !torch.vtensor<[4,32,10,9216],f64>
    %1655 = torch.aten.sum.dim_IntList %1654, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1656 = torch.aten.div.Scalar %1655, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1657 = torch.aten.to.dtype %1656, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1658 = torch.aten.sum.dim_IntList %1649, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1659 = torch.aten.div.Scalar %1658, %int92160 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1660 = torch.aten.add.Tensor %1657, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1661 = torch.aten.rsqrt %1660 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %1662 = torch.aten.sub.Tensor %1642, %1659, %int1 : !torch.vtensor<[4,32,10,9216],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,9216],f32>
    %1663 = torch.aten.mul.Tensor %1662, %1661 : !torch.vtensor<[4,32,10,9216],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,9216],f32>
    %1664 = torch.aten.view %1663, %1011 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f32>
    %1665 = torch.aten.unsqueeze %885, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1666 = torch.aten.unsqueeze %1665, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1667 = torch.aten.unsqueeze %1666, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1668 = torch.aten.unsqueeze %884, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1669 = torch.aten.unsqueeze %1668, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1670 = torch.aten.unsqueeze %1669, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1671 = torch.aten.mul.Tensor %1664, %1670 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,96,96],f32>
    %1672 = torch.aten.add.Tensor %1671, %1667, %int1 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f32>
    %1673 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1674 = torch.aten.to.dtype %1673, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1675 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1676 = torch.aten.broadcast_to %1674, %1675 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1677 = torch.aten.to.dtype %1672, %int5, %false, %false, %none : !torch.vtensor<[4,320,96,96],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,96,96],f16>
    %1678 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1679 = torch.aten.broadcast_to %1677, %1678 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1680 = torch.aten.sigmoid %1679 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1681 = torch.aten.mul.Tensor %1680, %1679 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1682 = torch.aten.view %1681, %1030 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1683 = torch.aten.abs %1682 : !torch.vtensor<[4,20,16,96,96],f16> -> !torch.vtensor<[4,20,16,96,96],f16>
    %values_30, %indices_31 = torch.aten.max.dim %1683, %int2, %true : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,96,96],f16>, !torch.vtensor<[4,20,1,96,96],si64>
    %1684 = torch.aten.view %values_30, %1033 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,96,96],f16>
    %1685 = torch.aten.broadcast_to %1684, %1030 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1686 = torch.aten.clone %1685, %int0 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int -> !torch.vtensor<[4,20,16,96,96],f16>
    %1687 = torch.aten.view %1686, %1011 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1688 = torch.aten.div.Scalar %1687, %int128 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1689 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1690 = torch.aten.detach %1689 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1691 = torch.aten.div.Tensor %1681, %1688 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1692 = torch.aten.add.Tensor %1691, %1690, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1693 = torch.aten.round %1692 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1694 = torch.aten.clamp %1693, %int-128, %int127 : !torch.vtensor<[4,320,96,96],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1695 = torch.aten.sub.Tensor %1694, %1690, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1696 = torch.aten.mul.Tensor %1695, %1688 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1697 = torch.aten.broadcast_to %883, %1047 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16>
    %1698 = torch.aten.clone %1697, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16>
    %1699 = torch.aten.view %1698, %1050 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16>
    %1700 = torch.aten.mul.Tensor %882, %1699 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16>
    %1701 = torch.aten.convolution %1696, %1700, %881, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1702 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %1703 = torch.aten.mul.Tensor %1702, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %1704 = torch.aten.transpose.int %880, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %1705 = torch.aten.mm %1703, %1704 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[4,320],f16>
    %1706 = torch.aten.mul.Scalar %879, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1707 = torch.aten.add.Tensor %1706, %1705, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[4,320],f16>, !torch.int -> !torch.vtensor<[4,320],f16>
    %1708 = torch.aten.unsqueeze %1707, %int2 : !torch.vtensor<[4,320],f16>, !torch.int -> !torch.vtensor<[4,320,1],f16>
    %1709 = torch.aten.unsqueeze %1708, %int3 : !torch.vtensor<[4,320,1],f16>, !torch.int -> !torch.vtensor<[4,320,1,1],f16>
    %1710 = torch.aten.add.Tensor %1701, %1709, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1711 = torch.aten.view %1710, %987 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f16>
    %1712 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1713 = torch.aten.to.dtype %1712, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1714 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1715 = torch.aten.broadcast_to %1713, %1714 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1716 = torch.aten.to.dtype %1711, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f32>
    %1717 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1718 = torch.aten.broadcast_to %1716, %1717 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1719 = torch.aten.to.dtype %1718, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f64>
    %1720 = torch.aten.sum.dim_IntList %1719, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1721 = torch.aten.div.Scalar %1720, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1722 = torch.aten.sub.Tensor %1719, %1721, %float1.000000e00 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,9216],f64>
    %1723 = torch.aten.mul.Tensor %1722, %1722 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,10,9216],f64> -> !torch.vtensor<[4,32,10,9216],f64>
    %1724 = torch.aten.sum.dim_IntList %1723, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1725 = torch.aten.div.Scalar %1724, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1726 = torch.aten.to.dtype %1725, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1727 = torch.aten.sum.dim_IntList %1718, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1728 = torch.aten.div.Scalar %1727, %int92160 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1729 = torch.aten.add.Tensor %1726, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1730 = torch.aten.rsqrt %1729 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %1731 = torch.aten.sub.Tensor %1711, %1728, %int1 : !torch.vtensor<[4,32,10,9216],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,9216],f32>
    %1732 = torch.aten.mul.Tensor %1731, %1730 : !torch.vtensor<[4,32,10,9216],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,9216],f32>
    %1733 = torch.aten.view %1732, %1011 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f32>
    %1734 = torch.aten.unsqueeze %878, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1735 = torch.aten.unsqueeze %1734, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1736 = torch.aten.unsqueeze %1735, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1737 = torch.aten.unsqueeze %877, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1738 = torch.aten.unsqueeze %1737, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1739 = torch.aten.unsqueeze %1738, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1740 = torch.aten.mul.Tensor %1733, %1739 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,96,96],f32>
    %1741 = torch.aten.add.Tensor %1740, %1736, %int1 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f32>
    %1742 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1743 = torch.aten.to.dtype %1742, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1744 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1745 = torch.aten.broadcast_to %1743, %1744 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1746 = torch.aten.to.dtype %1741, %int5, %false, %false, %none : !torch.vtensor<[4,320,96,96],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,96,96],f16>
    %1747 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1748 = torch.aten.broadcast_to %1746, %1747 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1749 = torch.aten.sigmoid %1748 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1750 = torch.aten.mul.Tensor %1749, %1748 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1751 = torch.aten.view %1750, %1030 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1752 = torch.aten.abs %1751 : !torch.vtensor<[4,20,16,96,96],f16> -> !torch.vtensor<[4,20,16,96,96],f16>
    %values_32, %indices_33 = torch.aten.max.dim %1752, %int2, %true : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,96,96],f16>, !torch.vtensor<[4,20,1,96,96],si64>
    %1753 = torch.aten.view %values_32, %1033 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,96,96],f16>
    %1754 = torch.aten.broadcast_to %1753, %1030 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %1755 = torch.aten.clone %1754, %int0 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int -> !torch.vtensor<[4,20,16,96,96],f16>
    %1756 = torch.aten.view %1755, %1011 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1757 = torch.aten.div.Scalar %1756, %int128 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1758 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1759 = torch.aten.detach %1758 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1760 = torch.aten.div.Tensor %1750, %1757 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1761 = torch.aten.add.Tensor %1760, %1759, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1762 = torch.aten.round %1761 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1763 = torch.aten.clamp %1762, %int-128, %int127 : !torch.vtensor<[4,320,96,96],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1764 = torch.aten.sub.Tensor %1763, %1759, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1765 = torch.aten.mul.Tensor %1764, %1757 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1766 = torch.aten.broadcast_to %876, %1047 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16>
    %1767 = torch.aten.clone %1766, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16>
    %1768 = torch.aten.view %1767, %1050 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16>
    %1769 = torch.aten.mul.Tensor %875, %1768 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16>
    %1770 = torch.aten.convolution %1765, %1769, %874, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1771 = torch.aten.add.Tensor %1641, %1770, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %1772 = torch.aten.div.Tensor %1771, %925 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %1773 = torch.aten.view %1772, %987 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f16>
    %1774 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1775 = torch.aten.to.dtype %1774, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1776 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1777 = torch.aten.broadcast_to %1775, %1776 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1778 = torch.aten.to.dtype %1773, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f32>
    %1779 = torch.prim.ListConstruct %int4, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1780 = torch.aten.broadcast_to %1778, %1779 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,9216],f32>
    %1781 = torch.aten.to.dtype %1780, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,9216],f64>
    %1782 = torch.aten.sum.dim_IntList %1781, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1783 = torch.aten.div.Scalar %1782, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1784 = torch.aten.sub.Tensor %1781, %1783, %float1.000000e00 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,9216],f64>
    %1785 = torch.aten.mul.Tensor %1784, %1784 : !torch.vtensor<[4,32,10,9216],f64>, !torch.vtensor<[4,32,10,9216],f64> -> !torch.vtensor<[4,32,10,9216],f64>
    %1786 = torch.aten.sum.dim_IntList %1785, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %1787 = torch.aten.div.Scalar %1786, %int92160 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %1788 = torch.aten.to.dtype %1787, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1789 = torch.aten.sum.dim_IntList %1780, %996, %true, %none : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %1790 = torch.aten.div.Scalar %1789, %int92160 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1791 = torch.aten.add.Tensor %1788, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %1792 = torch.aten.rsqrt %1791 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %1793 = torch.aten.sub.Tensor %1773, %1790, %int1 : !torch.vtensor<[4,32,10,9216],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,9216],f32>
    %1794 = torch.aten.mul.Tensor %1793, %1792 : !torch.vtensor<[4,32,10,9216],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,9216],f32>
    %1795 = torch.aten.view %1794, %1011 : !torch.vtensor<[4,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f32>
    %1796 = torch.aten.unsqueeze %873, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1797 = torch.aten.unsqueeze %1796, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1798 = torch.aten.unsqueeze %1797, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1799 = torch.aten.unsqueeze %872, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %1800 = torch.aten.unsqueeze %1799, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %1801 = torch.aten.unsqueeze %1800, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %1802 = torch.aten.mul.Tensor %1795, %1801 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,96,96],f32>
    %1803 = torch.aten.add.Tensor %1802, %1798, %int1 : !torch.vtensor<[4,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f32>
    %1804 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1805 = torch.aten.to.dtype %1804, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1806 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1807 = torch.aten.broadcast_to %1805, %1806 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1808 = torch.aten.to.dtype %1803, %int5, %false, %false, %none : !torch.vtensor<[4,320,96,96],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,96,96],f16>
    %1809 = torch.prim.ListConstruct %int4, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1810 = torch.aten.broadcast_to %1808, %1809 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %1811 = torch.aten.permute %1810, %1163 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,96,96,320],f16>
    %1812 = torch.aten.view %1811, %1165 : !torch.vtensor<[4,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1813 = torch.aten.clone %1812, %int0 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1814 = torch.aten.view %1813, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1815 = torch.aten.abs %1814 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_34, %indices_35 = torch.aten.max.dim %1815, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1816 = torch.aten.view %values_34, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1817 = torch.aten.broadcast_to %1816, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1818 = torch.aten.clone %1817, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1819 = torch.aten.view %1818, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1820 = torch.aten.div.Scalar %1819, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1821 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1822 = torch.aten.detach %1821 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1823 = torch.aten.div.Tensor %1813, %1820 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1824 = torch.aten.add.Tensor %1823, %1822, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1825 = torch.aten.round %1824 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1826 = torch.aten.clamp %1825, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1827 = torch.aten.sub.Tensor %1826, %1822, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1828 = torch.aten.mul.Tensor %1827, %1820 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1829 = torch.aten.broadcast_to %871, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1830 = torch.aten.clone %1829, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1831 = torch.aten.view %1830, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1832 = torch.aten.mul.Tensor %870, %1831 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1833 = torch.aten.transpose.int %1832, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1834 = torch.aten.view %1828, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1835 = torch.aten.mm %1834, %1833 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1836 = torch.aten.mul.Scalar %869, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1837 = torch.aten.add.Tensor %1836, %1835, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1838 = torch.aten.view %1837, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1839 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1840 = torch.aten.sum.dim_IntList %1838, %1839, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1841 = torch.aten.div.Scalar %1840, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1842 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1843 = torch.aten.broadcast_to %1841, %1842 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1844 = torch.aten.sub.Tensor %1838, %1843, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1845 = torch.aten.mul.Tensor %1844, %1844 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1846 = torch.aten.sum.dim_IntList %1845, %1839, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %1847 = torch.aten.div.Scalar %1846, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1848 = torch.aten.add.Scalar %1847, %float1.000000e-05, %int1 : !torch.vtensor<[4,9216,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %1849 = torch.aten.rsqrt %1848 : !torch.vtensor<[4,9216,1],f16> -> !torch.vtensor<[4,9216,1],f16>
    %1850 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1851 = torch.aten.broadcast_to %1849, %1850 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1852 = torch.aten.mul.Tensor %1844, %1851 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1853 = torch.aten.mul.Tensor %1852, %868 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1854 = torch.aten.add.Tensor %1853, %867, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1855 = torch.aten.view %1854, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1856 = torch.aten.abs %1855 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_36, %indices_37 = torch.aten.max.dim %1856, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1857 = torch.aten.view %values_36, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1858 = torch.aten.broadcast_to %1857, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1859 = torch.aten.clone %1858, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1860 = torch.aten.view %1859, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1861 = torch.aten.div.Scalar %1860, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1862 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1863 = torch.aten.detach %1862 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1864 = torch.aten.div.Tensor %1854, %1861 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1865 = torch.aten.add.Tensor %1864, %1863, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1866 = torch.aten.round %1865 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1867 = torch.aten.clamp %1866, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1868 = torch.aten.sub.Tensor %1867, %1863, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1869 = torch.aten.mul.Tensor %1868, %1861 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1870 = torch.aten.broadcast_to %866, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1871 = torch.aten.clone %1870, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1872 = torch.aten.view %1871, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1873 = torch.aten.mul.Tensor %865, %1872 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1874 = torch.aten.transpose.int %1873, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1875 = torch.aten.view %1869, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1876 = torch.aten.mm %1875, %1874 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1877 = torch.aten.view %1876, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1878 = torch.aten.view %1877, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1879 = torch.aten.permute %1878, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1880 = torch.aten.clone %1879, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1881 = torch.aten.view %1880, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1882 = torch.aten.view %1854, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1883 = torch.aten.abs %1882 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_38, %indices_39 = torch.aten.max.dim %1883, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1884 = torch.aten.view %values_38, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1885 = torch.aten.broadcast_to %1884, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1886 = torch.aten.clone %1885, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1887 = torch.aten.view %1886, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1888 = torch.aten.div.Scalar %1887, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1889 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1890 = torch.aten.detach %1889 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1891 = torch.aten.div.Tensor %1854, %1888 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1892 = torch.aten.add.Tensor %1891, %1890, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1893 = torch.aten.round %1892 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1894 = torch.aten.clamp %1893, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1895 = torch.aten.sub.Tensor %1894, %1890, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1896 = torch.aten.mul.Tensor %1895, %1888 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1897 = torch.aten.broadcast_to %864, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1898 = torch.aten.clone %1897, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1899 = torch.aten.view %1898, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1900 = torch.aten.mul.Tensor %863, %1899 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1901 = torch.aten.transpose.int %1900, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1902 = torch.aten.view %1896, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1903 = torch.aten.mm %1902, %1901 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1904 = torch.aten.view %1903, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1905 = torch.aten.view %1854, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1906 = torch.aten.abs %1905 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_40, %indices_41 = torch.aten.max.dim %1906, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1907 = torch.aten.view %values_40, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1908 = torch.aten.broadcast_to %1907, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1909 = torch.aten.clone %1908, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1910 = torch.aten.view %1909, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1911 = torch.aten.div.Scalar %1910, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1912 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1913 = torch.aten.detach %1912 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1914 = torch.aten.div.Tensor %1854, %1911 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1915 = torch.aten.add.Tensor %1914, %1913, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1916 = torch.aten.round %1915 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1917 = torch.aten.clamp %1916, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1918 = torch.aten.sub.Tensor %1917, %1913, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1919 = torch.aten.mul.Tensor %1918, %1911 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1920 = torch.aten.broadcast_to %862, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1921 = torch.aten.clone %1920, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1922 = torch.aten.view %1921, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1923 = torch.aten.mul.Tensor %861, %1922 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1924 = torch.aten.transpose.int %1923, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1925 = torch.aten.view %1919, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1926 = torch.aten.mm %1925, %1924 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1927 = torch.aten.view %1926, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1928 = torch.aten.view %1904, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1929 = torch.aten.permute %1928, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1930 = torch.aten.clone %1929, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1931 = torch.aten.view %1930, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1932 = torch.aten.view %1927, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1933 = torch.aten.permute %1932, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1934 = torch.aten.clone %1933, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %1935 = torch.aten.view %1934, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %1936 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1937 = torch.aten.to.dtype %1936, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1938 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1939 = torch.aten.broadcast_to %1937, %1938 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1940 = torch.aten.to.dtype %1881, %int6, %false, %false, %none : !torch.vtensor<[20,9216,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,64],f32>
    %1941 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1942 = torch.aten.broadcast_to %1940, %1941 : !torch.vtensor<[20,9216,64],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1943 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1944 = torch.aten.to.dtype %1943, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %1945 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1946 = torch.aten.broadcast_to %1944, %1945 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1947 = torch.aten.to.dtype %1931, %int6, %false, %false, %none : !torch.vtensor<[20,9216,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,64],f32>
    %1948 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1949 = torch.aten.broadcast_to %1947, %1948 : !torch.vtensor<[20,9216,64],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %1950 = torch.aten.empty.memory_format %1312, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,9216],f32>
    %1951 = torch.aten.transpose.int %1949, %int-1, %int-2 : !torch.vtensor<[20,9216,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[20,64,9216],f32>
    %1952 = torch.aten.bmm %1942, %1951 : !torch.vtensor<[20,9216,64],f32>, !torch.vtensor<[20,64,9216],f32> -> !torch.vtensor<[20,9216,9216],f32>
    %1953 = torch.aten.mul.Scalar %1952, %float1.250000e-01 : !torch.vtensor<[20,9216,9216],f32>, !torch.float -> !torch.vtensor<[20,9216,9216],f32>
    %1954 = torch.aten.add.Tensor %1953, %1950, %int0 : !torch.vtensor<[20,9216,9216],f32>, !torch.vtensor<[20,9216,9216],f32>, !torch.int -> !torch.vtensor<[20,9216,9216],f32>
    %values_42, %indices_43 = torch.aten.max.dim %1954, %int-1, %true : !torch.vtensor<[20,9216,9216],f32>, !torch.int, !torch.bool -> !torch.vtensor<[20,9216,1],f32>, !torch.vtensor<[20,9216,1],si64>
    %1955 = torch.aten.sub.Tensor %1954, %values_42, %float1.000000e00 : !torch.vtensor<[20,9216,9216],f32>, !torch.vtensor<[20,9216,1],f32>, !torch.float -> !torch.vtensor<[20,9216,9216],f32>
    %1956 = torch.aten.exp %1955 : !torch.vtensor<[20,9216,9216],f32> -> !torch.vtensor<[20,9216,9216],f32>
    %1957 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1958 = torch.aten.sum.dim_IntList %1956, %1957, %true, %none : !torch.vtensor<[20,9216,9216],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,1],f32>
    %1959 = torch.aten.div.Tensor %1956, %1958 : !torch.vtensor<[20,9216,9216],f32>, !torch.vtensor<[20,9216,1],f32> -> !torch.vtensor<[20,9216,9216],f32>
    %1960 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %1961 = torch.aten.to.dtype %1960, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %1962 = torch.prim.ListConstruct %int20, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1963 = torch.aten.broadcast_to %1961, %1962 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,9216],f16>
    %1964 = torch.aten.to.dtype %1959, %int5, %false, %false, %none : !torch.vtensor<[20,9216,9216],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,9216],f16>
    %1965 = torch.prim.ListConstruct %int20, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1966 = torch.aten.broadcast_to %1964, %1965 : !torch.vtensor<[20,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,9216],f16>
    %1967 = torch.aten.bmm %1966, %1935 : !torch.vtensor<[20,9216,9216],f16>, !torch.vtensor<[20,9216,64],f16> -> !torch.vtensor<[20,9216,64],f16>
    %1968 = torch.aten.view %1967, %1331 : !torch.vtensor<[20,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %1969 = torch.aten.permute %1968, %1239 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %1970 = torch.aten.clone %1969, %int0 : !torch.vtensor<[4,9216,5,64],f16>, !torch.int -> !torch.vtensor<[4,9216,5,64],f16>
    %1971 = torch.aten.view %1970, %1165 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1972 = torch.aten.view %1971, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1973 = torch.aten.abs %1972 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_44, %indices_45 = torch.aten.max.dim %1973, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %1974 = torch.aten.view %values_44, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %1975 = torch.aten.broadcast_to %1974, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %1976 = torch.aten.clone %1975, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %1977 = torch.aten.view %1976, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1978 = torch.aten.div.Scalar %1977, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1979 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1980 = torch.aten.detach %1979 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %1981 = torch.aten.div.Tensor %1971, %1978 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1982 = torch.aten.add.Tensor %1981, %1980, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1983 = torch.aten.round %1982 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1984 = torch.aten.clamp %1983, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1985 = torch.aten.sub.Tensor %1984, %1980, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1986 = torch.aten.mul.Tensor %1985, %1978 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %1987 = torch.aten.broadcast_to %860, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %1988 = torch.aten.clone %1987, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %1989 = torch.aten.view %1988, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %1990 = torch.aten.mul.Tensor %859, %1989 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %1991 = torch.aten.transpose.int %1990, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %1992 = torch.aten.view %1986, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %1993 = torch.aten.mm %1992, %1991 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %1994 = torch.aten.mul.Scalar %858, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %1995 = torch.aten.add.Tensor %1994, %1993, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %1996 = torch.aten.view %1995, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %1997 = torch.aten.add.Tensor %1996, %1838, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %1998 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1999 = torch.aten.sum.dim_IntList %1997, %1998, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %2000 = torch.aten.div.Scalar %1999, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %2001 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2002 = torch.aten.broadcast_to %2000, %2001 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2003 = torch.aten.sub.Tensor %1997, %2002, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2004 = torch.aten.mul.Tensor %2003, %2003 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2005 = torch.aten.sum.dim_IntList %2004, %1998, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %2006 = torch.aten.div.Scalar %2005, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %2007 = torch.aten.add.Scalar %2006, %float1.000000e-05, %int1 : !torch.vtensor<[4,9216,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %2008 = torch.aten.rsqrt %2007 : !torch.vtensor<[4,9216,1],f16> -> !torch.vtensor<[4,9216,1],f16>
    %2009 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2010 = torch.aten.broadcast_to %2008, %2009 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2011 = torch.aten.mul.Tensor %2003, %2010 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2012 = torch.aten.mul.Tensor %2011, %857 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2013 = torch.aten.add.Tensor %2012, %856, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2014 = torch.aten.view %2013, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2015 = torch.aten.abs %2014 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_46, %indices_47 = torch.aten.max.dim %2015, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %2016 = torch.aten.view %values_46, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %2017 = torch.aten.broadcast_to %2016, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2018 = torch.aten.clone %2017, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %2019 = torch.aten.view %2018, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2020 = torch.aten.div.Scalar %2019, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2021 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2022 = torch.aten.detach %2021 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2023 = torch.aten.div.Tensor %2013, %2020 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2024 = torch.aten.add.Tensor %2023, %2022, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2025 = torch.aten.round %2024 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2026 = torch.aten.clamp %2025, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2027 = torch.aten.sub.Tensor %2026, %2022, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2028 = torch.aten.mul.Tensor %2027, %2020 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2029 = torch.aten.broadcast_to %855, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %2030 = torch.aten.clone %2029, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %2031 = torch.aten.view %2030, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %2032 = torch.aten.mul.Tensor %854, %2031 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %2033 = torch.aten.transpose.int %2032, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %2034 = torch.aten.view %2028, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %2035 = torch.aten.mm %2034, %2033 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %2036 = torch.aten.view %2035, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2037 = torch.aten.view %2036, %1237 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %2038 = torch.aten.permute %2037, %1239 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %2039 = torch.aten.clone %2038, %int0 : !torch.vtensor<[4,5,9216,64],f16>, !torch.int -> !torch.vtensor<[4,5,9216,64],f16>
    %2040 = torch.aten.view %2039, %1242 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f16>
    %2041 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2042 = torch.aten.abs %2041 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_48, %indices_49 = torch.aten.max.dim %2042, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %2043 = torch.aten.view %values_48, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %2044 = torch.aten.broadcast_to %2043, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2045 = torch.aten.clone %2044, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %2046 = torch.aten.view %2045, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %2047 = torch.aten.div.Scalar %2046, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2048 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2049 = torch.aten.detach %2048 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2050 = torch.aten.div.Tensor %arg2, %2047 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2051 = torch.aten.add.Tensor %2050, %2049, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2052 = torch.aten.round %2051 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2053 = torch.aten.clamp %2052, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2054 = torch.aten.sub.Tensor %2053, %2049, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2055 = torch.aten.mul.Tensor %2054, %2047 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2056 = torch.aten.broadcast_to %853, %1423 : !torch.vtensor<[320,64,1],f16>, !torch.list<int> -> !torch.vtensor<[320,64,16],f16>
    %2057 = torch.aten.clone %2056, %int0 : !torch.vtensor<[320,64,16],f16>, !torch.int -> !torch.vtensor<[320,64,16],f16>
    %2058 = torch.aten.view %2057, %1426 : !torch.vtensor<[320,64,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1024],f16>
    %2059 = torch.aten.mul.Tensor %852, %2058 : !torch.vtensor<[320,1024],si8>, !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[320,1024],f16>
    %2060 = torch.aten.transpose.int %2059, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16>
    %2061 = torch.aten.view %2055, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %2062 = torch.aten.mm %2061, %2060 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[308,320],f16>
    %2063 = torch.aten.view %2062, %1433 : !torch.vtensor<[308,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,320],f16>
    %2064 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2065 = torch.aten.abs %2064 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_50, %indices_51 = torch.aten.max.dim %2065, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %2066 = torch.aten.view %values_50, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %2067 = torch.aten.broadcast_to %2066, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2068 = torch.aten.clone %2067, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %2069 = torch.aten.view %2068, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %2070 = torch.aten.div.Scalar %2069, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2071 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2072 = torch.aten.detach %2071 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2073 = torch.aten.div.Tensor %arg2, %2070 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2074 = torch.aten.add.Tensor %2073, %2072, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2075 = torch.aten.round %2074 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2076 = torch.aten.clamp %2075, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2077 = torch.aten.sub.Tensor %2076, %2072, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2078 = torch.aten.mul.Tensor %2077, %2070 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2079 = torch.aten.broadcast_to %851, %1423 : !torch.vtensor<[320,64,1],f16>, !torch.list<int> -> !torch.vtensor<[320,64,16],f16>
    %2080 = torch.aten.clone %2079, %int0 : !torch.vtensor<[320,64,16],f16>, !torch.int -> !torch.vtensor<[320,64,16],f16>
    %2081 = torch.aten.view %2080, %1426 : !torch.vtensor<[320,64,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1024],f16>
    %2082 = torch.aten.mul.Tensor %850, %2081 : !torch.vtensor<[320,1024],si8>, !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[320,1024],f16>
    %2083 = torch.aten.transpose.int %2082, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16>
    %2084 = torch.aten.view %2078, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %2085 = torch.aten.mm %2084, %2083 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[308,320],f16>
    %2086 = torch.aten.view %2085, %1433 : !torch.vtensor<[308,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,320],f16>
    %2087 = torch.aten.view %2063, %1458 : !torch.vtensor<[4,77,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,5,64],f16>
    %2088 = torch.aten.permute %2087, %1239 : !torch.vtensor<[4,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,77,64],f16>
    %2089 = torch.aten.clone %2088, %int0 : !torch.vtensor<[4,5,77,64],f16>, !torch.int -> !torch.vtensor<[4,5,77,64],f16>
    %2090 = torch.aten.view %2089, %1462 : !torch.vtensor<[4,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
    %2091 = torch.aten.view %2086, %1458 : !torch.vtensor<[4,77,320],f16>, !torch.list<int> -> !torch.vtensor<[4,77,5,64],f16>
    %2092 = torch.aten.permute %2091, %1239 : !torch.vtensor<[4,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,77,64],f16>
    %2093 = torch.aten.clone %2092, %int0 : !torch.vtensor<[4,5,77,64],f16>, !torch.int -> !torch.vtensor<[4,5,77,64],f16>
    %2094 = torch.aten.view %2093, %1462 : !torch.vtensor<[4,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
    %2095 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2096 = torch.aten.to.dtype %2095, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2097 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2098 = torch.aten.broadcast_to %2096, %2097 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %2099 = torch.aten.to.dtype %2040, %int6, %false, %false, %none : !torch.vtensor<[20,9216,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,64],f32>
    %2100 = torch.prim.ListConstruct %int20, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2101 = torch.aten.broadcast_to %2099, %2100 : !torch.vtensor<[20,9216,64],f32>, !torch.list<int> -> !torch.vtensor<[20,9216,64],f32>
    %2102 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2103 = torch.aten.to.dtype %2102, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2104 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2105 = torch.aten.broadcast_to %2103, %2104 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[20,77,64],f32>
    %2106 = torch.aten.to.dtype %2090, %int6, %false, %false, %none : !torch.vtensor<[20,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,77,64],f32>
    %2107 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2108 = torch.aten.broadcast_to %2106, %2107 : !torch.vtensor<[20,77,64],f32>, !torch.list<int> -> !torch.vtensor<[20,77,64],f32>
    %2109 = torch.aten.empty.memory_format %1482, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,77],f32>
    %2110 = torch.aten.transpose.int %2108, %int-1, %int-2 : !torch.vtensor<[20,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[20,64,77],f32>
    %2111 = torch.aten.bmm %2101, %2110 : !torch.vtensor<[20,9216,64],f32>, !torch.vtensor<[20,64,77],f32> -> !torch.vtensor<[20,9216,77],f32>
    %2112 = torch.aten.mul.Scalar %2111, %float1.250000e-01 : !torch.vtensor<[20,9216,77],f32>, !torch.float -> !torch.vtensor<[20,9216,77],f32>
    %2113 = torch.aten.add.Tensor %2112, %2109, %int0 : !torch.vtensor<[20,9216,77],f32>, !torch.vtensor<[20,9216,77],f32>, !torch.int -> !torch.vtensor<[20,9216,77],f32>
    %values_52, %indices_53 = torch.aten.max.dim %2113, %int-1, %true : !torch.vtensor<[20,9216,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[20,9216,1],f32>, !torch.vtensor<[20,9216,1],si64>
    %2114 = torch.aten.sub.Tensor %2113, %values_52, %float1.000000e00 : !torch.vtensor<[20,9216,77],f32>, !torch.vtensor<[20,9216,1],f32>, !torch.float -> !torch.vtensor<[20,9216,77],f32>
    %2115 = torch.aten.exp %2114 : !torch.vtensor<[20,9216,77],f32> -> !torch.vtensor<[20,9216,77],f32>
    %2116 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2117 = torch.aten.sum.dim_IntList %2115, %2116, %true, %none : !torch.vtensor<[20,9216,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,1],f32>
    %2118 = torch.aten.div.Tensor %2115, %2117 : !torch.vtensor<[20,9216,77],f32>, !torch.vtensor<[20,9216,1],f32> -> !torch.vtensor<[20,9216,77],f32>
    %2119 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2120 = torch.aten.to.dtype %2119, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2121 = torch.prim.ListConstruct %int20, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2122 = torch.aten.broadcast_to %2120, %2121 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,77],f16>
    %2123 = torch.aten.to.dtype %2118, %int5, %false, %false, %none : !torch.vtensor<[20,9216,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,9216,77],f16>
    %2124 = torch.prim.ListConstruct %int20, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2125 = torch.aten.broadcast_to %2123, %2124 : !torch.vtensor<[20,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[20,9216,77],f16>
    %2126 = torch.aten.bmm %2125, %2094 : !torch.vtensor<[20,9216,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,9216,64],f16>
    %2127 = torch.aten.view %2126, %1331 : !torch.vtensor<[20,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,5,9216,64],f16>
    %2128 = torch.aten.permute %2127, %1239 : !torch.vtensor<[4,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,5,64],f16>
    %2129 = torch.aten.clone %2128, %int0 : !torch.vtensor<[4,9216,5,64],f16>, !torch.int -> !torch.vtensor<[4,9216,5,64],f16>
    %2130 = torch.aten.view %2129, %1165 : !torch.vtensor<[4,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2131 = torch.aten.view %2130, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2132 = torch.aten.abs %2131 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_54, %indices_55 = torch.aten.max.dim %2132, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %2133 = torch.aten.view %values_54, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %2134 = torch.aten.broadcast_to %2133, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2135 = torch.aten.clone %2134, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %2136 = torch.aten.view %2135, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2137 = torch.aten.div.Scalar %2136, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2138 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2139 = torch.aten.detach %2138 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2140 = torch.aten.div.Tensor %2130, %2137 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2141 = torch.aten.add.Tensor %2140, %2139, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2142 = torch.aten.round %2141 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2143 = torch.aten.clamp %2142, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2144 = torch.aten.sub.Tensor %2143, %2139, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2145 = torch.aten.mul.Tensor %2144, %2137 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2146 = torch.aten.broadcast_to %849, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %2147 = torch.aten.clone %2146, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %2148 = torch.aten.view %2147, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %2149 = torch.aten.mul.Tensor %848, %2148 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %2150 = torch.aten.transpose.int %2149, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %2151 = torch.aten.view %2145, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %2152 = torch.aten.mm %2151, %2150 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %2153 = torch.aten.mul.Scalar %847, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %2154 = torch.aten.add.Tensor %2153, %2152, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %2155 = torch.aten.view %2154, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2156 = torch.aten.add.Tensor %2155, %1997, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2157 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2158 = torch.aten.sum.dim_IntList %2156, %2157, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %2159 = torch.aten.div.Scalar %2158, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %2160 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2161 = torch.aten.broadcast_to %2159, %2160 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2162 = torch.aten.sub.Tensor %2156, %2161, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2163 = torch.aten.mul.Tensor %2162, %2162 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2164 = torch.aten.sum.dim_IntList %2163, %2157, %true, %none : !torch.vtensor<[4,9216,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,9216,1],f16>
    %2165 = torch.aten.div.Scalar %2164, %int320 : !torch.vtensor<[4,9216,1],f16>, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %2166 = torch.aten.add.Scalar %2165, %float1.000000e-05, %int1 : !torch.vtensor<[4,9216,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,9216,1],f16>
    %2167 = torch.aten.rsqrt %2166 : !torch.vtensor<[4,9216,1],f16> -> !torch.vtensor<[4,9216,1],f16>
    %2168 = torch.prim.ListConstruct %int4, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2169 = torch.aten.broadcast_to %2167, %2168 : !torch.vtensor<[4,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2170 = torch.aten.mul.Tensor %2162, %2169 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2171 = torch.aten.mul.Tensor %2170, %846 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2172 = torch.aten.add.Tensor %2171, %845, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2173 = torch.aten.view %2172, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2174 = torch.aten.abs %2173 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_56, %indices_57 = torch.aten.max.dim %2174, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %2175 = torch.aten.view %values_56, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %2176 = torch.aten.broadcast_to %2175, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2177 = torch.aten.clone %2176, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %2178 = torch.aten.view %2177, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2179 = torch.aten.div.Scalar %2178, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2180 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2181 = torch.aten.detach %2180 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2182 = torch.aten.div.Tensor %2172, %2179 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2183 = torch.aten.add.Tensor %2182, %2181, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2184 = torch.aten.round %2183 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2185 = torch.aten.clamp %2184, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2186 = torch.aten.sub.Tensor %2185, %2181, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2187 = torch.aten.mul.Tensor %2186, %2179 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2188 = torch.aten.broadcast_to %844, %1562 : !torch.vtensor<[2560,20,1],f16>, !torch.list<int> -> !torch.vtensor<[2560,20,16],f16>
    %2189 = torch.aten.clone %2188, %int0 : !torch.vtensor<[2560,20,16],f16>, !torch.int -> !torch.vtensor<[2560,20,16],f16>
    %2190 = torch.aten.view %2189, %1565 : !torch.vtensor<[2560,20,16],f16>, !torch.list<int> -> !torch.vtensor<[2560,320],f16>
    %2191 = torch.aten.mul.Tensor %843, %2190 : !torch.vtensor<[2560,320],si8>, !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[2560,320],f16>
    %2192 = torch.aten.transpose.int %2191, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16>
    %2193 = torch.aten.view %2187, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %2194 = torch.aten.mm %2193, %2192 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[36864,2560],f16>
    %2195 = torch.aten.mul.Scalar %842, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16>
    %2196 = torch.aten.add.Tensor %2195, %2194, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[36864,2560],f16>, !torch.int -> !torch.vtensor<[36864,2560],f16>
    %2197 = torch.aten.view %2196, %1573 : !torch.vtensor<[36864,2560],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,2560],f16>
    %2198 = torch.aten.slice.Tensor %2197, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[4,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %2199 = torch.aten.slice.Tensor %2197, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[4,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %2200 = torch.aten.gelu %2199, %str : !torch.vtensor<[4,9216,1280],f16>, !torch.str -> !torch.vtensor<[4,9216,1280],f16>
    %2201 = torch.aten.mul.Tensor %2198, %2200 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %2202 = torch.aten.view %2201, %1579 : !torch.vtensor<[4,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,80,16],f16>
    %2203 = torch.aten.abs %2202 : !torch.vtensor<[4,9216,80,16],f16> -> !torch.vtensor<[4,9216,80,16],f16>
    %values_58, %indices_59 = torch.aten.max.dim %2203, %int3, %true : !torch.vtensor<[4,9216,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,80,1],f16>, !torch.vtensor<[4,9216,80,1],si64>
    %2204 = torch.aten.view %values_58, %1582 : !torch.vtensor<[4,9216,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,80,1],f16>
    %2205 = torch.aten.broadcast_to %2204, %1579 : !torch.vtensor<[4,9216,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,80,16],f16>
    %2206 = torch.aten.clone %2205, %int0 : !torch.vtensor<[4,9216,80,16],f16>, !torch.int -> !torch.vtensor<[4,9216,80,16],f16>
    %2207 = torch.aten.view %2206, %1586 : !torch.vtensor<[4,9216,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,1280],f16>
    %2208 = torch.aten.div.Scalar %2207, %int128 : !torch.vtensor<[4,9216,1280],f16>, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %2209 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2210 = torch.aten.detach %2209 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2211 = torch.aten.div.Tensor %2201, %2208 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %2212 = torch.aten.add.Tensor %2211, %2210, %int1 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %2213 = torch.aten.round %2212 : !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %2214 = torch.aten.clamp %2213, %int-128, %int127 : !torch.vtensor<[4,9216,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %2215 = torch.aten.sub.Tensor %2214, %2210, %int1 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,1280],f16>
    %2216 = torch.aten.mul.Tensor %2215, %2208 : !torch.vtensor<[4,9216,1280],f16>, !torch.vtensor<[4,9216,1280],f16> -> !torch.vtensor<[4,9216,1280],f16>
    %2217 = torch.aten.broadcast_to %841, %1597 : !torch.vtensor<[320,80,1],f16>, !torch.list<int> -> !torch.vtensor<[320,80,16],f16>
    %2218 = torch.aten.clone %2217, %int0 : !torch.vtensor<[320,80,16],f16>, !torch.int -> !torch.vtensor<[320,80,16],f16>
    %2219 = torch.aten.view %2218, %1600 : !torch.vtensor<[320,80,16],f16>, !torch.list<int> -> !torch.vtensor<[320,1280],f16>
    %2220 = torch.aten.mul.Tensor %840, %2219 : !torch.vtensor<[320,1280],si8>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[320,1280],f16>
    %2221 = torch.aten.transpose.int %2220, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16>
    %2222 = torch.aten.view %2216, %1604 : !torch.vtensor<[4,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[36864,1280],f16>
    %2223 = torch.aten.mm %2222, %2221 : !torch.vtensor<[36864,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[36864,320],f16>
    %2224 = torch.aten.mul.Scalar %839, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %2225 = torch.aten.add.Tensor %2224, %2223, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %2226 = torch.aten.view %2225, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2227 = torch.aten.add.Tensor %2226, %2156, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2228 = torch.aten.view %2227, %1168 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2229 = torch.aten.abs %2228 : !torch.vtensor<[4,9216,20,16],f16> -> !torch.vtensor<[4,9216,20,16],f16>
    %values_60, %indices_61 = torch.aten.max.dim %2229, %int3, %true : !torch.vtensor<[4,9216,20,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,9216,20,1],f16>, !torch.vtensor<[4,9216,20,1],si64>
    %2230 = torch.aten.view %values_60, %1171 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,1],f16>
    %2231 = torch.aten.broadcast_to %2230, %1168 : !torch.vtensor<[4,9216,20,1],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,20,16],f16>
    %2232 = torch.aten.clone %2231, %int0 : !torch.vtensor<[4,9216,20,16],f16>, !torch.int -> !torch.vtensor<[4,9216,20,16],f16>
    %2233 = torch.aten.view %2232, %1165 : !torch.vtensor<[4,9216,20,16],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2234 = torch.aten.div.Scalar %2233, %int128 : !torch.vtensor<[4,9216,320],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2235 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2236 = torch.aten.detach %2235 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2237 = torch.aten.div.Tensor %2227, %2234 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2238 = torch.aten.add.Tensor %2237, %2236, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2239 = torch.aten.round %2238 : !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2240 = torch.aten.clamp %2239, %int-128, %int127 : !torch.vtensor<[4,9216,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2241 = torch.aten.sub.Tensor %2240, %2236, %int1 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,9216,320],f16>
    %2242 = torch.aten.mul.Tensor %2241, %2234 : !torch.vtensor<[4,9216,320],f16>, !torch.vtensor<[4,9216,320],f16> -> !torch.vtensor<[4,9216,320],f16>
    %2243 = torch.aten.broadcast_to %838, %1185 : !torch.vtensor<[320,20,1],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16],f16>
    %2244 = torch.aten.clone %2243, %int0 : !torch.vtensor<[320,20,16],f16>, !torch.int -> !torch.vtensor<[320,20,16],f16>
    %2245 = torch.aten.view %2244, %1188 : !torch.vtensor<[320,20,16],f16>, !torch.list<int> -> !torch.vtensor<[320,320],f16>
    %2246 = torch.aten.mul.Tensor %837, %2245 : !torch.vtensor<[320,320],si8>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
    %2247 = torch.aten.transpose.int %2246, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16>
    %2248 = torch.aten.view %2242, %1192 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[36864,320],f16>
    %2249 = torch.aten.mm %2248, %2247 : !torch.vtensor<[36864,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[36864,320],f16>
    %2250 = torch.aten.mul.Scalar %836, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16>
    %2251 = torch.aten.add.Tensor %2250, %2249, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[36864,320],f16>, !torch.int -> !torch.vtensor<[36864,320],f16>
    %2252 = torch.aten.view %2251, %1165 : !torch.vtensor<[36864,320],f16>, !torch.list<int> -> !torch.vtensor<[4,9216,320],f16>
    %2253 = torch.aten.view %2252, %1636 : !torch.vtensor<[4,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[4,96,96,320],f16>
    %2254 = torch.aten.permute %2253, %1638 : !torch.vtensor<[4,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %2255 = torch.aten.clone %2254, %int0 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %2256 = torch.aten.add.Tensor %2255, %1772, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %2257 = torch.aten.view %2256, %1030 : !torch.vtensor<[4,320,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %2258 = torch.aten.abs %2257 : !torch.vtensor<[4,20,16,96,96],f16> -> !torch.vtensor<[4,20,16,96,96],f16>
    %values_62, %indices_63 = torch.aten.max.dim %2258, %int2, %true : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,96,96],f16>, !torch.vtensor<[4,20,1,96,96],si64>
    %2259 = torch.aten.view %values_62, %1033 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,96,96],f16>
    %2260 = torch.aten.broadcast_to %2259, %1030 : !torch.vtensor<[4,20,1,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,96,96],f16>
    %2261 = torch.aten.clone %2260, %int0 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.int -> !torch.vtensor<[4,20,16,96,96],f16>
    %2262 = torch.aten.view %2261, %1011 : !torch.vtensor<[4,20,16,96,96],f16>, !torch.list<int> -> !torch.vtensor<[4,320,96,96],f16>
    %2263 = torch.aten.div.Scalar %2262, %int128 : !torch.vtensor<[4,320,96,96],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %2264 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2265 = torch.aten.detach %2264 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2266 = torch.aten.div.Tensor %2256, %2263 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %2267 = torch.aten.add.Tensor %2266, %2265, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %2268 = torch.aten.round %2267 : !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %2269 = torch.aten.clamp %2268, %int-128, %int127 : !torch.vtensor<[4,320,96,96],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %2270 = torch.aten.sub.Tensor %2269, %2265, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,96,96],f16>
    %2271 = torch.aten.mul.Tensor %2270, %2263 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[4,320,96,96],f16> -> !torch.vtensor<[4,320,96,96],f16>
    %2272 = torch.aten.broadcast_to %835, %1047 : !torch.vtensor<[320,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,20,16,3,3],f16>
    %2273 = torch.aten.clone %2272, %int0 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[320,20,16,3,3],f16>
    %2274 = torch.aten.view %2273, %1050 : !torch.vtensor<[320,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[320,320,3,3],f16>
    %2275 = torch.aten.mul.Tensor %834, %2274 : !torch.vtensor<[320,320,3,3],si8>, !torch.vtensor<[320,320,3,3],f16> -> !torch.vtensor<[320,320,3,3],f16>
    %2276 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
    %2277 = torch.aten.convolution %2271, %2275, %833, %2276, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2278 = torch.prim.ListConstruct %int4, %int32, %int10, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2279 = torch.aten.view %2277, %2278 : !torch.vtensor<[4,320,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,32,10,2304],f16>
    %2280 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2281 = torch.aten.to.dtype %2280, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2282 = torch.prim.ListConstruct %int4, %int32, %int10, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2283 = torch.aten.broadcast_to %2281, %2282 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,2304],f32>
    %2284 = torch.aten.to.dtype %2279, %int6, %false, %false, %none : !torch.vtensor<[4,32,10,2304],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,2304],f32>
    %2285 = torch.prim.ListConstruct %int4, %int32, %int10, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2286 = torch.aten.broadcast_to %2284, %2285 : !torch.vtensor<[4,32,10,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,32,10,2304],f32>
    %2287 = torch.aten.to.dtype %2286, %int7, %false, %false, %none : !torch.vtensor<[4,32,10,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,10,2304],f64>
    %2288 = torch.aten.sum.dim_IntList %2287, %996, %true, %none : !torch.vtensor<[4,32,10,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2289 = torch.aten.div.Scalar %2288, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2290 = torch.aten.sub.Tensor %2287, %2289, %float1.000000e00 : !torch.vtensor<[4,32,10,2304],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,10,2304],f64>
    %2291 = torch.aten.mul.Tensor %2290, %2290 : !torch.vtensor<[4,32,10,2304],f64>, !torch.vtensor<[4,32,10,2304],f64> -> !torch.vtensor<[4,32,10,2304],f64>
    %2292 = torch.aten.sum.dim_IntList %2291, %996, %true, %none : !torch.vtensor<[4,32,10,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2293 = torch.aten.div.Scalar %2292, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2294 = torch.aten.to.dtype %2293, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2295 = torch.aten.sum.dim_IntList %2286, %996, %true, %none : !torch.vtensor<[4,32,10,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2296 = torch.aten.div.Scalar %2295, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2297 = torch.aten.add.Tensor %2294, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2298 = torch.aten.rsqrt %2297 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %2299 = torch.aten.sub.Tensor %2279, %2296, %int1 : !torch.vtensor<[4,32,10,2304],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,10,2304],f32>
    %2300 = torch.aten.mul.Tensor %2299, %2298 : !torch.vtensor<[4,32,10,2304],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,10,2304],f32>
    %2301 = torch.prim.ListConstruct %int4, %int320, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2302 = torch.aten.view %2300, %2301 : !torch.vtensor<[4,32,10,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,320,48,48],f32>
    %2303 = torch.aten.unsqueeze %832, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %2304 = torch.aten.unsqueeze %2303, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %2305 = torch.aten.unsqueeze %2304, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %2306 = torch.aten.unsqueeze %831, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16>
    %2307 = torch.aten.unsqueeze %2306, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16>
    %2308 = torch.aten.unsqueeze %2307, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16>
    %2309 = torch.aten.mul.Tensor %2302, %2308 : !torch.vtensor<[4,320,48,48],f32>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[4,320,48,48],f32>
    %2310 = torch.aten.add.Tensor %2309, %2305, %int1 : !torch.vtensor<[4,320,48,48],f32>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f32>
    %2311 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2312 = torch.aten.to.dtype %2311, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2313 = torch.prim.ListConstruct %int4, %int320, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2314 = torch.aten.broadcast_to %2312, %2313 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,320,48,48],f16>
    %2315 = torch.aten.to.dtype %2310, %int5, %false, %false, %none : !torch.vtensor<[4,320,48,48],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,320,48,48],f16>
    %2316 = torch.prim.ListConstruct %int4, %int320, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2317 = torch.aten.broadcast_to %2315, %2316 : !torch.vtensor<[4,320,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,320,48,48],f16>
    %2318 = torch.aten.sigmoid %2317 : !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2319 = torch.aten.mul.Tensor %2318, %2317 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2320 = torch.prim.ListConstruct %int4, %int20, %int16, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2321 = torch.aten.view %2319, %2320 : !torch.vtensor<[4,320,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,48,48],f16>
    %2322 = torch.aten.abs %2321 : !torch.vtensor<[4,20,16,48,48],f16> -> !torch.vtensor<[4,20,16,48,48],f16>
    %values_64, %indices_65 = torch.aten.max.dim %2322, %int2, %true : !torch.vtensor<[4,20,16,48,48],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,48,48],f16>, !torch.vtensor<[4,20,1,48,48],si64>
    %2323 = torch.prim.ListConstruct %int4, %int20, %int1, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2324 = torch.aten.view %values_64, %2323 : !torch.vtensor<[4,20,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,48,48],f16>
    %2325 = torch.aten.broadcast_to %2324, %2320 : !torch.vtensor<[4,20,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,48,48],f16>
    %2326 = torch.aten.clone %2325, %int0 : !torch.vtensor<[4,20,16,48,48],f16>, !torch.int -> !torch.vtensor<[4,20,16,48,48],f16>
    %2327 = torch.aten.view %2326, %2301 : !torch.vtensor<[4,20,16,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,320,48,48],f16>
    %2328 = torch.aten.div.Scalar %2327, %int128 : !torch.vtensor<[4,320,48,48],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2329 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2330 = torch.aten.detach %2329 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2331 = torch.aten.div.Tensor %2319, %2328 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2332 = torch.aten.add.Tensor %2331, %2330, %int1 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2333 = torch.aten.round %2332 : !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2334 = torch.aten.clamp %2333, %int-128, %int127 : !torch.vtensor<[4,320,48,48],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2335 = torch.aten.sub.Tensor %2334, %2330, %int1 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2336 = torch.aten.mul.Tensor %2335, %2328 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2337 = torch.prim.ListConstruct %int640, %int20, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2338 = torch.aten.broadcast_to %830, %2337 : !torch.vtensor<[640,20,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,20,16,3,3],f16>
    %2339 = torch.aten.clone %2338, %int0 : !torch.vtensor<[640,20,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,20,16,3,3],f16>
    %2340 = torch.prim.ListConstruct %int640, %int320, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2341 = torch.aten.view %2339, %2340 : !torch.vtensor<[640,20,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,320,3,3],f16>
    %2342 = torch.aten.mul.Tensor %829, %2341 : !torch.vtensor<[640,320,3,3],si8>, !torch.vtensor<[640,320,3,3],f16> -> !torch.vtensor<[640,320,3,3],f16>
    %2343 = torch.aten.convolution %2336, %2342, %828, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2344 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %2345 = torch.aten.mul.Tensor %2344, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %2346 = torch.aten.transpose.int %827, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %2347 = torch.aten.mm %2345, %2346 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[4,640],f16>
    %2348 = torch.aten.mul.Scalar %826, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %2349 = torch.aten.add.Tensor %2348, %2347, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4,640],f16>, !torch.int -> !torch.vtensor<[4,640],f16>
    %2350 = torch.aten.unsqueeze %2349, %int2 : !torch.vtensor<[4,640],f16>, !torch.int -> !torch.vtensor<[4,640,1],f16>
    %2351 = torch.aten.unsqueeze %2350, %int3 : !torch.vtensor<[4,640,1],f16>, !torch.int -> !torch.vtensor<[4,640,1,1],f16>
    %2352 = torch.aten.add.Tensor %2343, %2351, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2353 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2354 = torch.aten.view %2352, %2353 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f16>
    %2355 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2356 = torch.aten.to.dtype %2355, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2357 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2358 = torch.aten.broadcast_to %2356, %2357 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %2359 = torch.aten.to.dtype %2354, %int6, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f32>
    %2360 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2361 = torch.aten.broadcast_to %2359, %2360 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %2362 = torch.aten.to.dtype %2361, %int7, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f64>
    %2363 = torch.aten.sum.dim_IntList %2362, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2364 = torch.aten.div.Scalar %2363, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2365 = torch.aten.sub.Tensor %2362, %2364, %float1.000000e00 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,20,2304],f64>
    %2366 = torch.aten.mul.Tensor %2365, %2365 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,20,2304],f64> -> !torch.vtensor<[4,32,20,2304],f64>
    %2367 = torch.aten.sum.dim_IntList %2366, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2368 = torch.aten.div.Scalar %2367, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2369 = torch.aten.to.dtype %2368, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2370 = torch.aten.sum.dim_IntList %2361, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2371 = torch.aten.div.Scalar %2370, %int46080 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2372 = torch.aten.add.Tensor %2369, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2373 = torch.aten.rsqrt %2372 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %2374 = torch.aten.sub.Tensor %2354, %2371, %int1 : !torch.vtensor<[4,32,20,2304],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,20,2304],f32>
    %2375 = torch.aten.mul.Tensor %2374, %2373 : !torch.vtensor<[4,32,20,2304],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,20,2304],f32>
    %2376 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2377 = torch.aten.view %2375, %2376 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f32>
    %2378 = torch.aten.unsqueeze %825, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %2379 = torch.aten.unsqueeze %2378, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %2380 = torch.aten.unsqueeze %2379, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %2381 = torch.aten.unsqueeze %824, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %2382 = torch.aten.unsqueeze %2381, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %2383 = torch.aten.unsqueeze %2382, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %2384 = torch.aten.mul.Tensor %2377, %2383 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[4,640,48,48],f32>
    %2385 = torch.aten.add.Tensor %2384, %2380, %int1 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f32>
    %2386 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2387 = torch.aten.to.dtype %2386, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2388 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2389 = torch.aten.broadcast_to %2387, %2388 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2390 = torch.aten.to.dtype %2385, %int5, %false, %false, %none : !torch.vtensor<[4,640,48,48],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,640,48,48],f16>
    %2391 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2392 = torch.aten.broadcast_to %2390, %2391 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2393 = torch.aten.sigmoid %2392 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2394 = torch.aten.mul.Tensor %2393, %2392 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2395 = torch.prim.ListConstruct %int4, %int40, %int16, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2396 = torch.aten.view %2394, %2395 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %2397 = torch.aten.abs %2396 : !torch.vtensor<[4,40,16,48,48],f16> -> !torch.vtensor<[4,40,16,48,48],f16>
    %values_66, %indices_67 = torch.aten.max.dim %2397, %int2, %true : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,40,1,48,48],f16>, !torch.vtensor<[4,40,1,48,48],si64>
    %2398 = torch.prim.ListConstruct %int4, %int40, %int1, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2399 = torch.aten.view %values_66, %2398 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,1,48,48],f16>
    %2400 = torch.aten.broadcast_to %2399, %2395 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %2401 = torch.aten.clone %2400, %int0 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int -> !torch.vtensor<[4,40,16,48,48],f16>
    %2402 = torch.aten.view %2401, %2376 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2403 = torch.aten.div.Scalar %2402, %int128 : !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2404 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2405 = torch.aten.detach %2404 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2406 = torch.aten.div.Tensor %2394, %2403 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2407 = torch.aten.add.Tensor %2406, %2405, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2408 = torch.aten.round %2407 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2409 = torch.aten.clamp %2408, %int-128, %int127 : !torch.vtensor<[4,640,48,48],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2410 = torch.aten.sub.Tensor %2409, %2405, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2411 = torch.aten.mul.Tensor %2410, %2403 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2412 = torch.prim.ListConstruct %int640, %int40, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2413 = torch.aten.broadcast_to %823, %2412 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16>
    %2414 = torch.aten.clone %2413, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16>
    %2415 = torch.prim.ListConstruct %int640, %int640, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2416 = torch.aten.view %2414, %2415 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16>
    %2417 = torch.aten.mul.Tensor %822, %2416 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16>
    %2418 = torch.aten.convolution %2411, %2417, %821, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2419 = torch.aten.view %2277, %2320 : !torch.vtensor<[4,320,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,48,48],f16>
    %2420 = torch.aten.abs %2419 : !torch.vtensor<[4,20,16,48,48],f16> -> !torch.vtensor<[4,20,16,48,48],f16>
    %values_68, %indices_69 = torch.aten.max.dim %2420, %int2, %true : !torch.vtensor<[4,20,16,48,48],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,20,1,48,48],f16>, !torch.vtensor<[4,20,1,48,48],si64>
    %2421 = torch.aten.view %values_68, %2323 : !torch.vtensor<[4,20,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,20,1,48,48],f16>
    %2422 = torch.aten.broadcast_to %2421, %2320 : !torch.vtensor<[4,20,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,20,16,48,48],f16>
    %2423 = torch.aten.clone %2422, %int0 : !torch.vtensor<[4,20,16,48,48],f16>, !torch.int -> !torch.vtensor<[4,20,16,48,48],f16>
    %2424 = torch.aten.view %2423, %2301 : !torch.vtensor<[4,20,16,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,320,48,48],f16>
    %2425 = torch.aten.div.Scalar %2424, %int128 : !torch.vtensor<[4,320,48,48],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2426 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2427 = torch.aten.detach %2426 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2428 = torch.aten.div.Tensor %2277, %2425 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2429 = torch.aten.add.Tensor %2428, %2427, %int1 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2430 = torch.aten.round %2429 : !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2431 = torch.aten.clamp %2430, %int-128, %int127 : !torch.vtensor<[4,320,48,48],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2432 = torch.aten.sub.Tensor %2431, %2427, %int1 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,320,48,48],f16>
    %2433 = torch.aten.mul.Tensor %2432, %2425 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[4,320,48,48],f16> -> !torch.vtensor<[4,320,48,48],f16>
    %2434 = torch.prim.ListConstruct %int640, %int20, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2435 = torch.aten.broadcast_to %820, %2434 : !torch.vtensor<[640,20,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,20,16,1,1],f16>
    %2436 = torch.aten.clone %2435, %int0 : !torch.vtensor<[640,20,16,1,1],f16>, !torch.int -> !torch.vtensor<[640,20,16,1,1],f16>
    %2437 = torch.prim.ListConstruct %int640, %int320, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2438 = torch.aten.view %2436, %2437 : !torch.vtensor<[640,20,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[640,320,1,1],f16>
    %2439 = torch.aten.mul.Tensor %819, %2438 : !torch.vtensor<[640,320,1,1],si8>, !torch.vtensor<[640,320,1,1],f16> -> !torch.vtensor<[640,320,1,1],f16>
    %2440 = torch.aten.convolution %2433, %2439, %818, %984, %985, %984, %false, %985, %int1 : !torch.vtensor<[4,320,48,48],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2441 = torch.aten.add.Tensor %2440, %2418, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2442 = torch.aten.div.Tensor %2441, %925 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2443 = torch.aten.view %2442, %2353 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f16>
    %2444 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2445 = torch.aten.to.dtype %2444, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2446 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2447 = torch.aten.broadcast_to %2445, %2446 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %2448 = torch.aten.to.dtype %2443, %int6, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f32>
    %2449 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2450 = torch.aten.broadcast_to %2448, %2449 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %2451 = torch.aten.to.dtype %2450, %int7, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f64>
    %2452 = torch.aten.sum.dim_IntList %2451, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2453 = torch.aten.div.Scalar %2452, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2454 = torch.aten.sub.Tensor %2451, %2453, %float1.000000e00 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,20,2304],f64>
    %2455 = torch.aten.mul.Tensor %2454, %2454 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,20,2304],f64> -> !torch.vtensor<[4,32,20,2304],f64>
    %2456 = torch.aten.sum.dim_IntList %2455, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2457 = torch.aten.div.Scalar %2456, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2458 = torch.aten.to.dtype %2457, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2459 = torch.aten.sum.dim_IntList %2450, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2460 = torch.aten.div.Scalar %2459, %int46080 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2461 = torch.aten.add.Tensor %2458, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2462 = torch.aten.rsqrt %2461 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %2463 = torch.aten.sub.Tensor %2443, %2460, %int1 : !torch.vtensor<[4,32,20,2304],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,20,2304],f32>
    %2464 = torch.aten.mul.Tensor %2463, %2462 : !torch.vtensor<[4,32,20,2304],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,20,2304],f32>
    %2465 = torch.aten.view %2464, %2376 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f32>
    %2466 = torch.aten.unsqueeze %817, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %2467 = torch.aten.unsqueeze %2466, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %2468 = torch.aten.unsqueeze %2467, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %2469 = torch.aten.unsqueeze %816, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %2470 = torch.aten.unsqueeze %2469, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %2471 = torch.aten.unsqueeze %2470, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %2472 = torch.aten.mul.Tensor %2465, %2471 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[4,640,48,48],f32>
    %2473 = torch.aten.add.Tensor %2472, %2468, %int1 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f32>
    %2474 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2475 = torch.aten.to.dtype %2474, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2476 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2477 = torch.aten.broadcast_to %2475, %2476 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2478 = torch.aten.to.dtype %2473, %int5, %false, %false, %none : !torch.vtensor<[4,640,48,48],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,640,48,48],f16>
    %2479 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2480 = torch.aten.broadcast_to %2478, %2479 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2481 = torch.aten.permute %2480, %1163 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,48,48,640],f16>
    %2482 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2483 = torch.aten.view %2481, %2482 : !torch.vtensor<[4,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2484 = torch.aten.clone %2483, %int0 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2485 = torch.prim.ListConstruct %int4, %int2304, %int40, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2486 = torch.aten.view %2484, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2487 = torch.aten.abs %2486 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_70, %indices_71 = torch.aten.max.dim %2487, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2488 = torch.prim.ListConstruct %int4, %int2304, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2489 = torch.aten.view %values_70, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2490 = torch.aten.broadcast_to %2489, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2491 = torch.aten.clone %2490, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2492 = torch.aten.view %2491, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2493 = torch.aten.div.Scalar %2492, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2494 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2495 = torch.aten.detach %2494 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2496 = torch.aten.div.Tensor %2484, %2493 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2497 = torch.aten.add.Tensor %2496, %2495, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2498 = torch.aten.round %2497 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2499 = torch.aten.clamp %2498, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2500 = torch.aten.sub.Tensor %2499, %2495, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2501 = torch.aten.mul.Tensor %2500, %2493 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2502 = torch.prim.ListConstruct %int640, %int40, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2503 = torch.aten.broadcast_to %815, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2504 = torch.aten.clone %2503, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2505 = torch.prim.ListConstruct %int640, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
    %2506 = torch.aten.view %2504, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2507 = torch.aten.mul.Tensor %814, %2506 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2508 = torch.aten.transpose.int %2507, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2509 = torch.prim.ListConstruct %int9216, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
    %2510 = torch.aten.view %2501, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2511 = torch.aten.mm %2510, %2508 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2512 = torch.aten.mul.Scalar %813, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %2513 = torch.aten.add.Tensor %2512, %2511, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %2514 = torch.aten.view %2513, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2515 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2516 = torch.aten.sum.dim_IntList %2514, %2515, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %2517 = torch.aten.div.Scalar %2516, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2518 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2519 = torch.aten.broadcast_to %2517, %2518 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2520 = torch.aten.sub.Tensor %2514, %2519, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2521 = torch.aten.mul.Tensor %2520, %2520 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2522 = torch.aten.sum.dim_IntList %2521, %2515, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %2523 = torch.aten.div.Scalar %2522, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2524 = torch.aten.add.Scalar %2523, %float1.000000e-05, %int1 : !torch.vtensor<[4,2304,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2525 = torch.aten.rsqrt %2524 : !torch.vtensor<[4,2304,1],f16> -> !torch.vtensor<[4,2304,1],f16>
    %2526 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2527 = torch.aten.broadcast_to %2525, %2526 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2528 = torch.aten.mul.Tensor %2520, %2527 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2529 = torch.aten.mul.Tensor %2528, %812 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2530 = torch.aten.add.Tensor %2529, %811, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2531 = torch.aten.view %2530, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2532 = torch.aten.abs %2531 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_72, %indices_73 = torch.aten.max.dim %2532, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2533 = torch.aten.view %values_72, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2534 = torch.aten.broadcast_to %2533, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2535 = torch.aten.clone %2534, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2536 = torch.aten.view %2535, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2537 = torch.aten.div.Scalar %2536, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2538 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2539 = torch.aten.detach %2538 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2540 = torch.aten.div.Tensor %2530, %2537 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2541 = torch.aten.add.Tensor %2540, %2539, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2542 = torch.aten.round %2541 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2543 = torch.aten.clamp %2542, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2544 = torch.aten.sub.Tensor %2543, %2539, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2545 = torch.aten.mul.Tensor %2544, %2537 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2546 = torch.aten.broadcast_to %810, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2547 = torch.aten.clone %2546, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2548 = torch.aten.view %2547, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2549 = torch.aten.mul.Tensor %809, %2548 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2550 = torch.aten.transpose.int %2549, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2551 = torch.aten.view %2545, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2552 = torch.aten.mm %2551, %2550 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2553 = torch.aten.view %2552, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2554 = torch.prim.ListConstruct %int4, %int2304, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2555 = torch.aten.view %2553, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %2556 = torch.aten.permute %2555, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %2557 = torch.aten.clone %2556, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %2558 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2559 = torch.aten.view %2557, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %2560 = torch.aten.view %2530, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2561 = torch.aten.abs %2560 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_74, %indices_75 = torch.aten.max.dim %2561, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2562 = torch.aten.view %values_74, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2563 = torch.aten.broadcast_to %2562, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2564 = torch.aten.clone %2563, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2565 = torch.aten.view %2564, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2566 = torch.aten.div.Scalar %2565, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2567 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2568 = torch.aten.detach %2567 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2569 = torch.aten.div.Tensor %2530, %2566 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2570 = torch.aten.add.Tensor %2569, %2568, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2571 = torch.aten.round %2570 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2572 = torch.aten.clamp %2571, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2573 = torch.aten.sub.Tensor %2572, %2568, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2574 = torch.aten.mul.Tensor %2573, %2566 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2575 = torch.aten.broadcast_to %808, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2576 = torch.aten.clone %2575, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2577 = torch.aten.view %2576, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2578 = torch.aten.mul.Tensor %807, %2577 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2579 = torch.aten.transpose.int %2578, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2580 = torch.aten.view %2574, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2581 = torch.aten.mm %2580, %2579 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2582 = torch.aten.view %2581, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2583 = torch.aten.view %2530, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2584 = torch.aten.abs %2583 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_76, %indices_77 = torch.aten.max.dim %2584, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2585 = torch.aten.view %values_76, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2586 = torch.aten.broadcast_to %2585, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2587 = torch.aten.clone %2586, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2588 = torch.aten.view %2587, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2589 = torch.aten.div.Scalar %2588, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2590 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2591 = torch.aten.detach %2590 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2592 = torch.aten.div.Tensor %2530, %2589 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2593 = torch.aten.add.Tensor %2592, %2591, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2594 = torch.aten.round %2593 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2595 = torch.aten.clamp %2594, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2596 = torch.aten.sub.Tensor %2595, %2591, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2597 = torch.aten.mul.Tensor %2596, %2589 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2598 = torch.aten.broadcast_to %806, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2599 = torch.aten.clone %2598, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2600 = torch.aten.view %2599, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2601 = torch.aten.mul.Tensor %805, %2600 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2602 = torch.aten.transpose.int %2601, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2603 = torch.aten.view %2597, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2604 = torch.aten.mm %2603, %2602 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2605 = torch.aten.view %2604, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2606 = torch.aten.view %2582, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %2607 = torch.aten.permute %2606, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %2608 = torch.aten.clone %2607, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %2609 = torch.aten.view %2608, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %2610 = torch.aten.view %2605, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %2611 = torch.aten.permute %2610, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %2612 = torch.aten.clone %2611, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %2613 = torch.aten.view %2612, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %2614 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2615 = torch.aten.to.dtype %2614, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2616 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2617 = torch.aten.broadcast_to %2615, %2616 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %2618 = torch.aten.to.dtype %2559, %int6, %false, %false, %none : !torch.vtensor<[40,2304,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,64],f32>
    %2619 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2620 = torch.aten.broadcast_to %2618, %2619 : !torch.vtensor<[40,2304,64],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %2621 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2622 = torch.aten.to.dtype %2621, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2623 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2624 = torch.aten.broadcast_to %2622, %2623 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %2625 = torch.aten.to.dtype %2609, %int6, %false, %false, %none : !torch.vtensor<[40,2304,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,64],f32>
    %2626 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2627 = torch.aten.broadcast_to %2625, %2626 : !torch.vtensor<[40,2304,64],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %2628 = torch.prim.ListConstruct %int40, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2629 = torch.aten.empty.memory_format %2628, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,2304],f32>
    %2630 = torch.aten.transpose.int %2627, %int-1, %int-2 : !torch.vtensor<[40,2304,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[40,64,2304],f32>
    %2631 = torch.aten.bmm %2620, %2630 : !torch.vtensor<[40,2304,64],f32>, !torch.vtensor<[40,64,2304],f32> -> !torch.vtensor<[40,2304,2304],f32>
    %2632 = torch.aten.mul.Scalar %2631, %float1.250000e-01 : !torch.vtensor<[40,2304,2304],f32>, !torch.float -> !torch.vtensor<[40,2304,2304],f32>
    %2633 = torch.aten.add.Tensor %2632, %2629, %int0 : !torch.vtensor<[40,2304,2304],f32>, !torch.vtensor<[40,2304,2304],f32>, !torch.int -> !torch.vtensor<[40,2304,2304],f32>
    %values_78, %indices_79 = torch.aten.max.dim %2633, %int-1, %true : !torch.vtensor<[40,2304,2304],f32>, !torch.int, !torch.bool -> !torch.vtensor<[40,2304,1],f32>, !torch.vtensor<[40,2304,1],si64>
    %2634 = torch.aten.sub.Tensor %2633, %values_78, %float1.000000e00 : !torch.vtensor<[40,2304,2304],f32>, !torch.vtensor<[40,2304,1],f32>, !torch.float -> !torch.vtensor<[40,2304,2304],f32>
    %2635 = torch.aten.exp %2634 : !torch.vtensor<[40,2304,2304],f32> -> !torch.vtensor<[40,2304,2304],f32>
    %2636 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2637 = torch.aten.sum.dim_IntList %2635, %2636, %true, %none : !torch.vtensor<[40,2304,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,1],f32>
    %2638 = torch.aten.div.Tensor %2635, %2637 : !torch.vtensor<[40,2304,2304],f32>, !torch.vtensor<[40,2304,1],f32> -> !torch.vtensor<[40,2304,2304],f32>
    %2639 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2640 = torch.aten.to.dtype %2639, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2641 = torch.prim.ListConstruct %int40, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2642 = torch.aten.broadcast_to %2640, %2641 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,2304],f16>
    %2643 = torch.aten.to.dtype %2638, %int5, %false, %false, %none : !torch.vtensor<[40,2304,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,2304],f16>
    %2644 = torch.prim.ListConstruct %int40, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2645 = torch.aten.broadcast_to %2643, %2644 : !torch.vtensor<[40,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,2304],f16>
    %2646 = torch.aten.bmm %2645, %2613 : !torch.vtensor<[40,2304,2304],f16>, !torch.vtensor<[40,2304,64],f16> -> !torch.vtensor<[40,2304,64],f16>
    %2647 = torch.prim.ListConstruct %int4, %int10, %int2304, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2648 = torch.aten.view %2646, %2647 : !torch.vtensor<[40,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %2649 = torch.aten.permute %2648, %1239 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %2650 = torch.aten.clone %2649, %int0 : !torch.vtensor<[4,2304,10,64],f16>, !torch.int -> !torch.vtensor<[4,2304,10,64],f16>
    %2651 = torch.aten.view %2650, %2482 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2652 = torch.aten.view %2651, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2653 = torch.aten.abs %2652 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_80, %indices_81 = torch.aten.max.dim %2653, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2654 = torch.aten.view %values_80, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2655 = torch.aten.broadcast_to %2654, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2656 = torch.aten.clone %2655, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2657 = torch.aten.view %2656, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2658 = torch.aten.div.Scalar %2657, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2659 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2660 = torch.aten.detach %2659 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2661 = torch.aten.div.Tensor %2651, %2658 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2662 = torch.aten.add.Tensor %2661, %2660, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2663 = torch.aten.round %2662 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2664 = torch.aten.clamp %2663, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2665 = torch.aten.sub.Tensor %2664, %2660, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2666 = torch.aten.mul.Tensor %2665, %2658 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2667 = torch.aten.broadcast_to %804, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2668 = torch.aten.clone %2667, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2669 = torch.aten.view %2668, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2670 = torch.aten.mul.Tensor %803, %2669 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2671 = torch.aten.transpose.int %2670, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2672 = torch.aten.view %2666, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2673 = torch.aten.mm %2672, %2671 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2674 = torch.aten.mul.Scalar %802, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %2675 = torch.aten.add.Tensor %2674, %2673, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %2676 = torch.aten.view %2675, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2677 = torch.aten.add.Tensor %2676, %2514, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2678 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2679 = torch.aten.sum.dim_IntList %2677, %2678, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %2680 = torch.aten.div.Scalar %2679, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2681 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2682 = torch.aten.broadcast_to %2680, %2681 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2683 = torch.aten.sub.Tensor %2677, %2682, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2684 = torch.aten.mul.Tensor %2683, %2683 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2685 = torch.aten.sum.dim_IntList %2684, %2678, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %2686 = torch.aten.div.Scalar %2685, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2687 = torch.aten.add.Scalar %2686, %float1.000000e-05, %int1 : !torch.vtensor<[4,2304,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2688 = torch.aten.rsqrt %2687 : !torch.vtensor<[4,2304,1],f16> -> !torch.vtensor<[4,2304,1],f16>
    %2689 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2690 = torch.aten.broadcast_to %2688, %2689 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2691 = torch.aten.mul.Tensor %2683, %2690 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2692 = torch.aten.mul.Tensor %2691, %801 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2693 = torch.aten.add.Tensor %2692, %800, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2694 = torch.aten.view %2693, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2695 = torch.aten.abs %2694 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_82, %indices_83 = torch.aten.max.dim %2695, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2696 = torch.aten.view %values_82, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2697 = torch.aten.broadcast_to %2696, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2698 = torch.aten.clone %2697, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2699 = torch.aten.view %2698, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2700 = torch.aten.div.Scalar %2699, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2701 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2702 = torch.aten.detach %2701 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2703 = torch.aten.div.Tensor %2693, %2700 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2704 = torch.aten.add.Tensor %2703, %2702, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2705 = torch.aten.round %2704 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2706 = torch.aten.clamp %2705, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2707 = torch.aten.sub.Tensor %2706, %2702, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2708 = torch.aten.mul.Tensor %2707, %2700 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2709 = torch.aten.broadcast_to %799, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2710 = torch.aten.clone %2709, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2711 = torch.aten.view %2710, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2712 = torch.aten.mul.Tensor %798, %2711 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2713 = torch.aten.transpose.int %2712, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2714 = torch.aten.view %2708, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2715 = torch.aten.mm %2714, %2713 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2716 = torch.aten.view %2715, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2717 = torch.aten.view %2716, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %2718 = torch.aten.permute %2717, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %2719 = torch.aten.clone %2718, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %2720 = torch.aten.view %2719, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %2721 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2722 = torch.aten.abs %2721 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_84, %indices_85 = torch.aten.max.dim %2722, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %2723 = torch.aten.view %values_84, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %2724 = torch.aten.broadcast_to %2723, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2725 = torch.aten.clone %2724, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %2726 = torch.aten.view %2725, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %2727 = torch.aten.div.Scalar %2726, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2728 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2729 = torch.aten.detach %2728 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2730 = torch.aten.div.Tensor %arg2, %2727 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2731 = torch.aten.add.Tensor %2730, %2729, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2732 = torch.aten.round %2731 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2733 = torch.aten.clamp %2732, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2734 = torch.aten.sub.Tensor %2733, %2729, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2735 = torch.aten.mul.Tensor %2734, %2727 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2736 = torch.prim.ListConstruct %int640, %int64, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2737 = torch.aten.broadcast_to %797, %2736 : !torch.vtensor<[640,64,1],f16>, !torch.list<int> -> !torch.vtensor<[640,64,16],f16>
    %2738 = torch.aten.clone %2737, %int0 : !torch.vtensor<[640,64,16],f16>, !torch.int -> !torch.vtensor<[640,64,16],f16>
    %2739 = torch.prim.ListConstruct %int640, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
    %2740 = torch.aten.view %2738, %2739 : !torch.vtensor<[640,64,16],f16>, !torch.list<int> -> !torch.vtensor<[640,1024],f16>
    %2741 = torch.aten.mul.Tensor %796, %2740 : !torch.vtensor<[640,1024],si8>, !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[640,1024],f16>
    %2742 = torch.aten.transpose.int %2741, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16>
    %2743 = torch.aten.view %2735, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %2744 = torch.aten.mm %2743, %2742 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[308,640],f16>
    %2745 = torch.prim.ListConstruct %int4, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2746 = torch.aten.view %2744, %2745 : !torch.vtensor<[308,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,640],f16>
    %2747 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2748 = torch.aten.abs %2747 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_86, %indices_87 = torch.aten.max.dim %2748, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %2749 = torch.aten.view %values_86, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %2750 = torch.aten.broadcast_to %2749, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %2751 = torch.aten.clone %2750, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %2752 = torch.aten.view %2751, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %2753 = torch.aten.div.Scalar %2752, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2754 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2755 = torch.aten.detach %2754 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2756 = torch.aten.div.Tensor %arg2, %2753 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2757 = torch.aten.add.Tensor %2756, %2755, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2758 = torch.aten.round %2757 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2759 = torch.aten.clamp %2758, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2760 = torch.aten.sub.Tensor %2759, %2755, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %2761 = torch.aten.mul.Tensor %2760, %2753 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %2762 = torch.aten.broadcast_to %795, %2736 : !torch.vtensor<[640,64,1],f16>, !torch.list<int> -> !torch.vtensor<[640,64,16],f16>
    %2763 = torch.aten.clone %2762, %int0 : !torch.vtensor<[640,64,16],f16>, !torch.int -> !torch.vtensor<[640,64,16],f16>
    %2764 = torch.aten.view %2763, %2739 : !torch.vtensor<[640,64,16],f16>, !torch.list<int> -> !torch.vtensor<[640,1024],f16>
    %2765 = torch.aten.mul.Tensor %794, %2764 : !torch.vtensor<[640,1024],si8>, !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[640,1024],f16>
    %2766 = torch.aten.transpose.int %2765, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16>
    %2767 = torch.aten.view %2761, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %2768 = torch.aten.mm %2767, %2766 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[308,640],f16>
    %2769 = torch.aten.view %2768, %2745 : !torch.vtensor<[308,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,640],f16>
    %2770 = torch.prim.ListConstruct %int4, %int77, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2771 = torch.aten.view %2746, %2770 : !torch.vtensor<[4,77,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,10,64],f16>
    %2772 = torch.aten.permute %2771, %1239 : !torch.vtensor<[4,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,77,64],f16>
    %2773 = torch.aten.clone %2772, %int0 : !torch.vtensor<[4,10,77,64],f16>, !torch.int -> !torch.vtensor<[4,10,77,64],f16>
    %2774 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2775 = torch.aten.view %2773, %2774 : !torch.vtensor<[4,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
    %2776 = torch.aten.view %2769, %2770 : !torch.vtensor<[4,77,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,10,64],f16>
    %2777 = torch.aten.permute %2776, %1239 : !torch.vtensor<[4,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,77,64],f16>
    %2778 = torch.aten.clone %2777, %int0 : !torch.vtensor<[4,10,77,64],f16>, !torch.int -> !torch.vtensor<[4,10,77,64],f16>
    %2779 = torch.aten.view %2778, %2774 : !torch.vtensor<[4,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
    %2780 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2781 = torch.aten.to.dtype %2780, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2782 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2783 = torch.aten.broadcast_to %2781, %2782 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %2784 = torch.aten.to.dtype %2720, %int6, %false, %false, %none : !torch.vtensor<[40,2304,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,64],f32>
    %2785 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2786 = torch.aten.broadcast_to %2784, %2785 : !torch.vtensor<[40,2304,64],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %2787 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2788 = torch.aten.to.dtype %2787, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2789 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2790 = torch.aten.broadcast_to %2788, %2789 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,77,64],f32>
    %2791 = torch.aten.to.dtype %2775, %int6, %false, %false, %none : !torch.vtensor<[40,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,77,64],f32>
    %2792 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2793 = torch.aten.broadcast_to %2791, %2792 : !torch.vtensor<[40,77,64],f32>, !torch.list<int> -> !torch.vtensor<[40,77,64],f32>
    %2794 = torch.prim.ListConstruct %int40, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2795 = torch.aten.empty.memory_format %2794, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,77],f32>
    %2796 = torch.aten.transpose.int %2793, %int-1, %int-2 : !torch.vtensor<[40,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[40,64,77],f32>
    %2797 = torch.aten.bmm %2786, %2796 : !torch.vtensor<[40,2304,64],f32>, !torch.vtensor<[40,64,77],f32> -> !torch.vtensor<[40,2304,77],f32>
    %2798 = torch.aten.mul.Scalar %2797, %float1.250000e-01 : !torch.vtensor<[40,2304,77],f32>, !torch.float -> !torch.vtensor<[40,2304,77],f32>
    %2799 = torch.aten.add.Tensor %2798, %2795, %int0 : !torch.vtensor<[40,2304,77],f32>, !torch.vtensor<[40,2304,77],f32>, !torch.int -> !torch.vtensor<[40,2304,77],f32>
    %values_88, %indices_89 = torch.aten.max.dim %2799, %int-1, %true : !torch.vtensor<[40,2304,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[40,2304,1],f32>, !torch.vtensor<[40,2304,1],si64>
    %2800 = torch.aten.sub.Tensor %2799, %values_88, %float1.000000e00 : !torch.vtensor<[40,2304,77],f32>, !torch.vtensor<[40,2304,1],f32>, !torch.float -> !torch.vtensor<[40,2304,77],f32>
    %2801 = torch.aten.exp %2800 : !torch.vtensor<[40,2304,77],f32> -> !torch.vtensor<[40,2304,77],f32>
    %2802 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2803 = torch.aten.sum.dim_IntList %2801, %2802, %true, %none : !torch.vtensor<[40,2304,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,1],f32>
    %2804 = torch.aten.div.Tensor %2801, %2803 : !torch.vtensor<[40,2304,77],f32>, !torch.vtensor<[40,2304,1],f32> -> !torch.vtensor<[40,2304,77],f32>
    %2805 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2806 = torch.aten.to.dtype %2805, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2807 = torch.prim.ListConstruct %int40, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2808 = torch.aten.broadcast_to %2806, %2807 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,77],f16>
    %2809 = torch.aten.to.dtype %2804, %int5, %false, %false, %none : !torch.vtensor<[40,2304,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,77],f16>
    %2810 = torch.prim.ListConstruct %int40, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2811 = torch.aten.broadcast_to %2809, %2810 : !torch.vtensor<[40,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,77],f16>
    %2812 = torch.aten.bmm %2811, %2779 : !torch.vtensor<[40,2304,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,2304,64],f16>
    %2813 = torch.aten.view %2812, %2647 : !torch.vtensor<[40,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %2814 = torch.aten.permute %2813, %1239 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %2815 = torch.aten.clone %2814, %int0 : !torch.vtensor<[4,2304,10,64],f16>, !torch.int -> !torch.vtensor<[4,2304,10,64],f16>
    %2816 = torch.aten.view %2815, %2482 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2817 = torch.aten.view %2816, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2818 = torch.aten.abs %2817 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_90, %indices_91 = torch.aten.max.dim %2818, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2819 = torch.aten.view %values_90, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2820 = torch.aten.broadcast_to %2819, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2821 = torch.aten.clone %2820, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2822 = torch.aten.view %2821, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2823 = torch.aten.div.Scalar %2822, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2824 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2825 = torch.aten.detach %2824 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2826 = torch.aten.div.Tensor %2816, %2823 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2827 = torch.aten.add.Tensor %2826, %2825, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2828 = torch.aten.round %2827 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2829 = torch.aten.clamp %2828, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2830 = torch.aten.sub.Tensor %2829, %2825, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2831 = torch.aten.mul.Tensor %2830, %2823 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2832 = torch.aten.broadcast_to %793, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2833 = torch.aten.clone %2832, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2834 = torch.aten.view %2833, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2835 = torch.aten.mul.Tensor %792, %2834 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2836 = torch.aten.transpose.int %2835, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2837 = torch.aten.view %2831, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2838 = torch.aten.mm %2837, %2836 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2839 = torch.aten.mul.Scalar %791, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %2840 = torch.aten.add.Tensor %2839, %2838, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %2841 = torch.aten.view %2840, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2842 = torch.aten.add.Tensor %2841, %2677, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2843 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %2844 = torch.aten.sum.dim_IntList %2842, %2843, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %2845 = torch.aten.div.Scalar %2844, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2846 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2847 = torch.aten.broadcast_to %2845, %2846 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2848 = torch.aten.sub.Tensor %2842, %2847, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2849 = torch.aten.mul.Tensor %2848, %2848 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2850 = torch.aten.sum.dim_IntList %2849, %2843, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %2851 = torch.aten.div.Scalar %2850, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2852 = torch.aten.add.Scalar %2851, %float1.000000e-05, %int1 : !torch.vtensor<[4,2304,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %2853 = torch.aten.rsqrt %2852 : !torch.vtensor<[4,2304,1],f16> -> !torch.vtensor<[4,2304,1],f16>
    %2854 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2855 = torch.aten.broadcast_to %2853, %2854 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2856 = torch.aten.mul.Tensor %2848, %2855 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2857 = torch.aten.mul.Tensor %2856, %790 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2858 = torch.aten.add.Tensor %2857, %789, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2859 = torch.aten.view %2858, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2860 = torch.aten.abs %2859 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_92, %indices_93 = torch.aten.max.dim %2860, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2861 = torch.aten.view %values_92, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2862 = torch.aten.broadcast_to %2861, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2863 = torch.aten.clone %2862, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2864 = torch.aten.view %2863, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2865 = torch.aten.div.Scalar %2864, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2866 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2867 = torch.aten.detach %2866 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2868 = torch.aten.div.Tensor %2858, %2865 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2869 = torch.aten.add.Tensor %2868, %2867, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2870 = torch.aten.round %2869 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2871 = torch.aten.clamp %2870, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2872 = torch.aten.sub.Tensor %2871, %2867, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2873 = torch.aten.mul.Tensor %2872, %2865 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2874 = torch.prim.ListConstruct %int5120, %int40, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2875 = torch.aten.broadcast_to %788, %2874 : !torch.vtensor<[5120,40,1],f16>, !torch.list<int> -> !torch.vtensor<[5120,40,16],f16>
    %2876 = torch.aten.clone %2875, %int0 : !torch.vtensor<[5120,40,16],f16>, !torch.int -> !torch.vtensor<[5120,40,16],f16>
    %2877 = torch.prim.ListConstruct %int5120, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
    %2878 = torch.aten.view %2876, %2877 : !torch.vtensor<[5120,40,16],f16>, !torch.list<int> -> !torch.vtensor<[5120,640],f16>
    %2879 = torch.aten.mul.Tensor %787, %2878 : !torch.vtensor<[5120,640],si8>, !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[5120,640],f16>
    %2880 = torch.aten.transpose.int %2879, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %2881 = torch.aten.view %2873, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2882 = torch.aten.mm %2881, %2880 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[9216,5120],f16>
    %2883 = torch.aten.mul.Scalar %786, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %2884 = torch.aten.add.Tensor %2883, %2882, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[9216,5120],f16>, !torch.int -> !torch.vtensor<[9216,5120],f16>
    %2885 = torch.prim.ListConstruct %int4, %int2304, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2886 = torch.aten.view %2884, %2885 : !torch.vtensor<[9216,5120],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,5120],f16>
    %2887 = torch.aten.slice.Tensor %2886, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[4,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %2888 = torch.aten.slice.Tensor %2886, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[4,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %2889 = torch.aten.gelu %2888, %str : !torch.vtensor<[4,2304,2560],f16>, !torch.str -> !torch.vtensor<[4,2304,2560],f16>
    %2890 = torch.aten.mul.Tensor %2887, %2889 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %2891 = torch.prim.ListConstruct %int4, %int2304, %int160, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2892 = torch.aten.view %2890, %2891 : !torch.vtensor<[4,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,160,16],f16>
    %2893 = torch.aten.abs %2892 : !torch.vtensor<[4,2304,160,16],f16> -> !torch.vtensor<[4,2304,160,16],f16>
    %values_94, %indices_95 = torch.aten.max.dim %2893, %int3, %true : !torch.vtensor<[4,2304,160,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,160,1],f16>, !torch.vtensor<[4,2304,160,1],si64>
    %2894 = torch.prim.ListConstruct %int4, %int2304, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2895 = torch.aten.view %values_94, %2894 : !torch.vtensor<[4,2304,160,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,160,1],f16>
    %2896 = torch.aten.broadcast_to %2895, %2891 : !torch.vtensor<[4,2304,160,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,160,16],f16>
    %2897 = torch.aten.clone %2896, %int0 : !torch.vtensor<[4,2304,160,16],f16>, !torch.int -> !torch.vtensor<[4,2304,160,16],f16>
    %2898 = torch.prim.ListConstruct %int4, %int2304, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2899 = torch.aten.view %2897, %2898 : !torch.vtensor<[4,2304,160,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,2560],f16>
    %2900 = torch.aten.div.Scalar %2899, %int128 : !torch.vtensor<[4,2304,2560],f16>, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %2901 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2902 = torch.aten.detach %2901 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2903 = torch.aten.div.Tensor %2890, %2900 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %2904 = torch.aten.add.Tensor %2903, %2902, %int1 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %2905 = torch.aten.round %2904 : !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %2906 = torch.aten.clamp %2905, %int-128, %int127 : !torch.vtensor<[4,2304,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %2907 = torch.aten.sub.Tensor %2906, %2902, %int1 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %2908 = torch.aten.mul.Tensor %2907, %2900 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %2909 = torch.prim.ListConstruct %int640, %int160, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2910 = torch.aten.broadcast_to %785, %2909 : !torch.vtensor<[640,160,1],f16>, !torch.list<int> -> !torch.vtensor<[640,160,16],f16>
    %2911 = torch.aten.clone %2910, %int0 : !torch.vtensor<[640,160,16],f16>, !torch.int -> !torch.vtensor<[640,160,16],f16>
    %2912 = torch.prim.ListConstruct %int640, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
    %2913 = torch.aten.view %2911, %2912 : !torch.vtensor<[640,160,16],f16>, !torch.list<int> -> !torch.vtensor<[640,2560],f16>
    %2914 = torch.aten.mul.Tensor %784, %2913 : !torch.vtensor<[640,2560],si8>, !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[640,2560],f16>
    %2915 = torch.aten.transpose.int %2914, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %2916 = torch.prim.ListConstruct %int9216, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
    %2917 = torch.aten.view %2908, %2916 : !torch.vtensor<[4,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[9216,2560],f16>
    %2918 = torch.aten.mm %2917, %2915 : !torch.vtensor<[9216,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2919 = torch.aten.mul.Scalar %783, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %2920 = torch.aten.add.Tensor %2919, %2918, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %2921 = torch.aten.view %2920, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2922 = torch.aten.add.Tensor %2921, %2842, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2923 = torch.aten.view %2922, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2924 = torch.aten.abs %2923 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_96, %indices_97 = torch.aten.max.dim %2924, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %2925 = torch.aten.view %values_96, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %2926 = torch.aten.broadcast_to %2925, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %2927 = torch.aten.clone %2926, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %2928 = torch.aten.view %2927, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2929 = torch.aten.div.Scalar %2928, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2930 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2931 = torch.aten.detach %2930 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %2932 = torch.aten.div.Tensor %2922, %2929 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2933 = torch.aten.add.Tensor %2932, %2931, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2934 = torch.aten.round %2933 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2935 = torch.aten.clamp %2934, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2936 = torch.aten.sub.Tensor %2935, %2931, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %2937 = torch.aten.mul.Tensor %2936, %2929 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %2938 = torch.aten.broadcast_to %782, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %2939 = torch.aten.clone %2938, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %2940 = torch.aten.view %2939, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %2941 = torch.aten.mul.Tensor %781, %2940 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %2942 = torch.aten.transpose.int %2941, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %2943 = torch.aten.view %2937, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %2944 = torch.aten.mm %2943, %2942 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %2945 = torch.aten.mul.Scalar %780, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %2946 = torch.aten.add.Tensor %2945, %2944, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %2947 = torch.aten.view %2946, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %2948 = torch.prim.ListConstruct %int4, %int48, %int48, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2949 = torch.aten.view %2947, %2948 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,48,48,640],f16>
    %2950 = torch.aten.permute %2949, %1638 : !torch.vtensor<[4,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2951 = torch.aten.clone %2950, %int0 : !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2952 = torch.aten.add.Tensor %2951, %2442, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %2953 = torch.aten.view %2952, %2353 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f16>
    %2954 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2955 = torch.aten.to.dtype %2954, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %2956 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2957 = torch.aten.broadcast_to %2955, %2956 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %2958 = torch.aten.to.dtype %2953, %int6, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f32>
    %2959 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2960 = torch.aten.broadcast_to %2958, %2959 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %2961 = torch.aten.to.dtype %2960, %int7, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f64>
    %2962 = torch.aten.sum.dim_IntList %2961, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2963 = torch.aten.div.Scalar %2962, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2964 = torch.aten.sub.Tensor %2961, %2963, %float1.000000e00 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,20,2304],f64>
    %2965 = torch.aten.mul.Tensor %2964, %2964 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,20,2304],f64> -> !torch.vtensor<[4,32,20,2304],f64>
    %2966 = torch.aten.sum.dim_IntList %2965, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %2967 = torch.aten.div.Scalar %2966, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %2968 = torch.aten.to.dtype %2967, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2969 = torch.aten.sum.dim_IntList %2960, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %2970 = torch.aten.div.Scalar %2969, %int46080 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2971 = torch.aten.add.Tensor %2968, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %2972 = torch.aten.rsqrt %2971 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %2973 = torch.aten.sub.Tensor %2953, %2970, %int1 : !torch.vtensor<[4,32,20,2304],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,20,2304],f32>
    %2974 = torch.aten.mul.Tensor %2973, %2972 : !torch.vtensor<[4,32,20,2304],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,20,2304],f32>
    %2975 = torch.aten.view %2974, %2376 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f32>
    %2976 = torch.aten.unsqueeze %779, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %2977 = torch.aten.unsqueeze %2976, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %2978 = torch.aten.unsqueeze %2977, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %2979 = torch.aten.unsqueeze %778, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %2980 = torch.aten.unsqueeze %2979, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %2981 = torch.aten.unsqueeze %2980, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %2982 = torch.aten.mul.Tensor %2975, %2981 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[4,640,48,48],f32>
    %2983 = torch.aten.add.Tensor %2982, %2978, %int1 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f32>
    %2984 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %2985 = torch.aten.to.dtype %2984, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %2986 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2987 = torch.aten.broadcast_to %2985, %2986 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2988 = torch.aten.to.dtype %2983, %int5, %false, %false, %none : !torch.vtensor<[4,640,48,48],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,640,48,48],f16>
    %2989 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2990 = torch.aten.broadcast_to %2988, %2989 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2991 = torch.aten.sigmoid %2990 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2992 = torch.aten.mul.Tensor %2991, %2990 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %2993 = torch.aten.view %2992, %2395 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %2994 = torch.aten.abs %2993 : !torch.vtensor<[4,40,16,48,48],f16> -> !torch.vtensor<[4,40,16,48,48],f16>
    %values_98, %indices_99 = torch.aten.max.dim %2994, %int2, %true : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,40,1,48,48],f16>, !torch.vtensor<[4,40,1,48,48],si64>
    %2995 = torch.aten.view %values_98, %2398 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,1,48,48],f16>
    %2996 = torch.aten.broadcast_to %2995, %2395 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %2997 = torch.aten.clone %2996, %int0 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int -> !torch.vtensor<[4,40,16,48,48],f16>
    %2998 = torch.aten.view %2997, %2376 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %2999 = torch.aten.div.Scalar %2998, %int128 : !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3000 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3001 = torch.aten.detach %3000 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3002 = torch.aten.div.Tensor %2992, %2999 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3003 = torch.aten.add.Tensor %3002, %3001, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3004 = torch.aten.round %3003 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3005 = torch.aten.clamp %3004, %int-128, %int127 : !torch.vtensor<[4,640,48,48],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3006 = torch.aten.sub.Tensor %3005, %3001, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3007 = torch.aten.mul.Tensor %3006, %2999 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3008 = torch.aten.broadcast_to %777, %2412 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16>
    %3009 = torch.aten.clone %3008, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16>
    %3010 = torch.aten.view %3009, %2415 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16>
    %3011 = torch.aten.mul.Tensor %776, %3010 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16>
    %3012 = torch.aten.convolution %3007, %3011, %775, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3013 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %3014 = torch.aten.mul.Tensor %3013, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %3015 = torch.aten.transpose.int %774, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16>
    %3016 = torch.aten.mm %3014, %3015 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[4,640],f16>
    %3017 = torch.aten.mul.Scalar %773, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %3018 = torch.aten.add.Tensor %3017, %3016, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4,640],f16>, !torch.int -> !torch.vtensor<[4,640],f16>
    %3019 = torch.aten.unsqueeze %3018, %int2 : !torch.vtensor<[4,640],f16>, !torch.int -> !torch.vtensor<[4,640,1],f16>
    %3020 = torch.aten.unsqueeze %3019, %int3 : !torch.vtensor<[4,640,1],f16>, !torch.int -> !torch.vtensor<[4,640,1,1],f16>
    %3021 = torch.aten.add.Tensor %3012, %3020, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3022 = torch.aten.view %3021, %2353 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f16>
    %3023 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3024 = torch.aten.to.dtype %3023, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3025 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3026 = torch.aten.broadcast_to %3024, %3025 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %3027 = torch.aten.to.dtype %3022, %int6, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f32>
    %3028 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3029 = torch.aten.broadcast_to %3027, %3028 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %3030 = torch.aten.to.dtype %3029, %int7, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f64>
    %3031 = torch.aten.sum.dim_IntList %3030, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3032 = torch.aten.div.Scalar %3031, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3033 = torch.aten.sub.Tensor %3030, %3032, %float1.000000e00 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,20,2304],f64>
    %3034 = torch.aten.mul.Tensor %3033, %3033 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,20,2304],f64> -> !torch.vtensor<[4,32,20,2304],f64>
    %3035 = torch.aten.sum.dim_IntList %3034, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3036 = torch.aten.div.Scalar %3035, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3037 = torch.aten.to.dtype %3036, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3038 = torch.aten.sum.dim_IntList %3029, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3039 = torch.aten.div.Scalar %3038, %int46080 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3040 = torch.aten.add.Tensor %3037, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3041 = torch.aten.rsqrt %3040 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %3042 = torch.aten.sub.Tensor %3022, %3039, %int1 : !torch.vtensor<[4,32,20,2304],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,20,2304],f32>
    %3043 = torch.aten.mul.Tensor %3042, %3041 : !torch.vtensor<[4,32,20,2304],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,20,2304],f32>
    %3044 = torch.aten.view %3043, %2376 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f32>
    %3045 = torch.aten.unsqueeze %772, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %3046 = torch.aten.unsqueeze %3045, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %3047 = torch.aten.unsqueeze %3046, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %3048 = torch.aten.unsqueeze %771, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %3049 = torch.aten.unsqueeze %3048, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %3050 = torch.aten.unsqueeze %3049, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %3051 = torch.aten.mul.Tensor %3044, %3050 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[4,640,48,48],f32>
    %3052 = torch.aten.add.Tensor %3051, %3047, %int1 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f32>
    %3053 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3054 = torch.aten.to.dtype %3053, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3055 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3056 = torch.aten.broadcast_to %3054, %3055 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3057 = torch.aten.to.dtype %3052, %int5, %false, %false, %none : !torch.vtensor<[4,640,48,48],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,640,48,48],f16>
    %3058 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3059 = torch.aten.broadcast_to %3057, %3058 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3060 = torch.aten.sigmoid %3059 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3061 = torch.aten.mul.Tensor %3060, %3059 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3062 = torch.aten.view %3061, %2395 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %3063 = torch.aten.abs %3062 : !torch.vtensor<[4,40,16,48,48],f16> -> !torch.vtensor<[4,40,16,48,48],f16>
    %values_100, %indices_101 = torch.aten.max.dim %3063, %int2, %true : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,40,1,48,48],f16>, !torch.vtensor<[4,40,1,48,48],si64>
    %3064 = torch.aten.view %values_100, %2398 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,1,48,48],f16>
    %3065 = torch.aten.broadcast_to %3064, %2395 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %3066 = torch.aten.clone %3065, %int0 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int -> !torch.vtensor<[4,40,16,48,48],f16>
    %3067 = torch.aten.view %3066, %2376 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3068 = torch.aten.div.Scalar %3067, %int128 : !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3069 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3070 = torch.aten.detach %3069 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3071 = torch.aten.div.Tensor %3061, %3068 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3072 = torch.aten.add.Tensor %3071, %3070, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3073 = torch.aten.round %3072 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3074 = torch.aten.clamp %3073, %int-128, %int127 : !torch.vtensor<[4,640,48,48],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3075 = torch.aten.sub.Tensor %3074, %3070, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3076 = torch.aten.mul.Tensor %3075, %3068 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3077 = torch.aten.broadcast_to %770, %2412 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16>
    %3078 = torch.aten.clone %3077, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16>
    %3079 = torch.aten.view %3078, %2415 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16>
    %3080 = torch.aten.mul.Tensor %769, %3079 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16>
    %3081 = torch.aten.convolution %3076, %3080, %768, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3082 = torch.aten.add.Tensor %2952, %3081, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3083 = torch.aten.div.Tensor %3082, %925 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3084 = torch.aten.view %3083, %2353 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f16>
    %3085 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3086 = torch.aten.to.dtype %3085, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3087 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3088 = torch.aten.broadcast_to %3086, %3087 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %3089 = torch.aten.to.dtype %3084, %int6, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f32>
    %3090 = torch.prim.ListConstruct %int4, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3091 = torch.aten.broadcast_to %3089, %3090 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,2304],f32>
    %3092 = torch.aten.to.dtype %3091, %int7, %false, %false, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,2304],f64>
    %3093 = torch.aten.sum.dim_IntList %3092, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3094 = torch.aten.div.Scalar %3093, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3095 = torch.aten.sub.Tensor %3092, %3094, %float1.000000e00 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,20,2304],f64>
    %3096 = torch.aten.mul.Tensor %3095, %3095 : !torch.vtensor<[4,32,20,2304],f64>, !torch.vtensor<[4,32,20,2304],f64> -> !torch.vtensor<[4,32,20,2304],f64>
    %3097 = torch.aten.sum.dim_IntList %3096, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3098 = torch.aten.div.Scalar %3097, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3099 = torch.aten.to.dtype %3098, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3100 = torch.aten.sum.dim_IntList %3091, %996, %true, %none : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3101 = torch.aten.div.Scalar %3100, %int46080 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3102 = torch.aten.add.Tensor %3099, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3103 = torch.aten.rsqrt %3102 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %3104 = torch.aten.sub.Tensor %3084, %3101, %int1 : !torch.vtensor<[4,32,20,2304],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,20,2304],f32>
    %3105 = torch.aten.mul.Tensor %3104, %3103 : !torch.vtensor<[4,32,20,2304],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,20,2304],f32>
    %3106 = torch.aten.view %3105, %2376 : !torch.vtensor<[4,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f32>
    %3107 = torch.aten.unsqueeze %767, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %3108 = torch.aten.unsqueeze %3107, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %3109 = torch.aten.unsqueeze %3108, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %3110 = torch.aten.unsqueeze %766, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %3111 = torch.aten.unsqueeze %3110, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %3112 = torch.aten.unsqueeze %3111, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %3113 = torch.aten.mul.Tensor %3106, %3112 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[4,640,48,48],f32>
    %3114 = torch.aten.add.Tensor %3113, %3109, %int1 : !torch.vtensor<[4,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f32>
    %3115 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3116 = torch.aten.to.dtype %3115, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3117 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3118 = torch.aten.broadcast_to %3116, %3117 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3119 = torch.aten.to.dtype %3114, %int5, %false, %false, %none : !torch.vtensor<[4,640,48,48],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,640,48,48],f16>
    %3120 = torch.prim.ListConstruct %int4, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3121 = torch.aten.broadcast_to %3119, %3120 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3122 = torch.aten.permute %3121, %1163 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,48,48,640],f16>
    %3123 = torch.aten.view %3122, %2482 : !torch.vtensor<[4,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3124 = torch.aten.clone %3123, %int0 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3125 = torch.aten.view %3124, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3126 = torch.aten.abs %3125 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_102, %indices_103 = torch.aten.max.dim %3126, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3127 = torch.aten.view %values_102, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3128 = torch.aten.broadcast_to %3127, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3129 = torch.aten.clone %3128, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3130 = torch.aten.view %3129, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3131 = torch.aten.div.Scalar %3130, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3132 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3133 = torch.aten.detach %3132 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3134 = torch.aten.div.Tensor %3124, %3131 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3135 = torch.aten.add.Tensor %3134, %3133, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3136 = torch.aten.round %3135 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3137 = torch.aten.clamp %3136, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3138 = torch.aten.sub.Tensor %3137, %3133, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3139 = torch.aten.mul.Tensor %3138, %3131 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3140 = torch.aten.broadcast_to %765, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3141 = torch.aten.clone %3140, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3142 = torch.aten.view %3141, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3143 = torch.aten.mul.Tensor %764, %3142 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3144 = torch.aten.transpose.int %3143, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3145 = torch.aten.view %3139, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3146 = torch.aten.mm %3145, %3144 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3147 = torch.aten.mul.Scalar %763, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %3148 = torch.aten.add.Tensor %3147, %3146, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %3149 = torch.aten.view %3148, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3150 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3151 = torch.aten.sum.dim_IntList %3149, %3150, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %3152 = torch.aten.div.Scalar %3151, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3153 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3154 = torch.aten.broadcast_to %3152, %3153 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3155 = torch.aten.sub.Tensor %3149, %3154, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3156 = torch.aten.mul.Tensor %3155, %3155 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3157 = torch.aten.sum.dim_IntList %3156, %3150, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %3158 = torch.aten.div.Scalar %3157, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3159 = torch.aten.add.Scalar %3158, %float1.000000e-05, %int1 : !torch.vtensor<[4,2304,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3160 = torch.aten.rsqrt %3159 : !torch.vtensor<[4,2304,1],f16> -> !torch.vtensor<[4,2304,1],f16>
    %3161 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3162 = torch.aten.broadcast_to %3160, %3161 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3163 = torch.aten.mul.Tensor %3155, %3162 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3164 = torch.aten.mul.Tensor %3163, %762 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3165 = torch.aten.add.Tensor %3164, %761, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3166 = torch.aten.view %3165, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3167 = torch.aten.abs %3166 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_104, %indices_105 = torch.aten.max.dim %3167, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3168 = torch.aten.view %values_104, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3169 = torch.aten.broadcast_to %3168, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3170 = torch.aten.clone %3169, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3171 = torch.aten.view %3170, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3172 = torch.aten.div.Scalar %3171, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3173 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3174 = torch.aten.detach %3173 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3175 = torch.aten.div.Tensor %3165, %3172 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3176 = torch.aten.add.Tensor %3175, %3174, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3177 = torch.aten.round %3176 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3178 = torch.aten.clamp %3177, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3179 = torch.aten.sub.Tensor %3178, %3174, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3180 = torch.aten.mul.Tensor %3179, %3172 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3181 = torch.aten.broadcast_to %760, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3182 = torch.aten.clone %3181, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3183 = torch.aten.view %3182, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3184 = torch.aten.mul.Tensor %759, %3183 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3185 = torch.aten.transpose.int %3184, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3186 = torch.aten.view %3180, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3187 = torch.aten.mm %3186, %3185 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3188 = torch.aten.view %3187, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3189 = torch.aten.view %3188, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %3190 = torch.aten.permute %3189, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %3191 = torch.aten.clone %3190, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %3192 = torch.aten.view %3191, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %3193 = torch.aten.view %3165, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3194 = torch.aten.abs %3193 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_106, %indices_107 = torch.aten.max.dim %3194, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3195 = torch.aten.view %values_106, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3196 = torch.aten.broadcast_to %3195, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3197 = torch.aten.clone %3196, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3198 = torch.aten.view %3197, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3199 = torch.aten.div.Scalar %3198, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3200 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3201 = torch.aten.detach %3200 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3202 = torch.aten.div.Tensor %3165, %3199 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3203 = torch.aten.add.Tensor %3202, %3201, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3204 = torch.aten.round %3203 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3205 = torch.aten.clamp %3204, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3206 = torch.aten.sub.Tensor %3205, %3201, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3207 = torch.aten.mul.Tensor %3206, %3199 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3208 = torch.aten.broadcast_to %758, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3209 = torch.aten.clone %3208, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3210 = torch.aten.view %3209, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3211 = torch.aten.mul.Tensor %757, %3210 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3212 = torch.aten.transpose.int %3211, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3213 = torch.aten.view %3207, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3214 = torch.aten.mm %3213, %3212 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3215 = torch.aten.view %3214, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3216 = torch.aten.view %3165, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3217 = torch.aten.abs %3216 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_108, %indices_109 = torch.aten.max.dim %3217, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3218 = torch.aten.view %values_108, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3219 = torch.aten.broadcast_to %3218, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3220 = torch.aten.clone %3219, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3221 = torch.aten.view %3220, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3222 = torch.aten.div.Scalar %3221, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3223 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3224 = torch.aten.detach %3223 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3225 = torch.aten.div.Tensor %3165, %3222 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3226 = torch.aten.add.Tensor %3225, %3224, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3227 = torch.aten.round %3226 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3228 = torch.aten.clamp %3227, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3229 = torch.aten.sub.Tensor %3228, %3224, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3230 = torch.aten.mul.Tensor %3229, %3222 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3231 = torch.aten.broadcast_to %756, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3232 = torch.aten.clone %3231, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3233 = torch.aten.view %3232, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3234 = torch.aten.mul.Tensor %755, %3233 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3235 = torch.aten.transpose.int %3234, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3236 = torch.aten.view %3230, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3237 = torch.aten.mm %3236, %3235 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3238 = torch.aten.view %3237, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3239 = torch.aten.view %3215, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %3240 = torch.aten.permute %3239, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %3241 = torch.aten.clone %3240, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %3242 = torch.aten.view %3241, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %3243 = torch.aten.view %3238, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %3244 = torch.aten.permute %3243, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %3245 = torch.aten.clone %3244, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %3246 = torch.aten.view %3245, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %3247 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3248 = torch.aten.to.dtype %3247, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3249 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3250 = torch.aten.broadcast_to %3248, %3249 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %3251 = torch.aten.to.dtype %3192, %int6, %false, %false, %none : !torch.vtensor<[40,2304,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,64],f32>
    %3252 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3253 = torch.aten.broadcast_to %3251, %3252 : !torch.vtensor<[40,2304,64],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %3254 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3255 = torch.aten.to.dtype %3254, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3256 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3257 = torch.aten.broadcast_to %3255, %3256 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %3258 = torch.aten.to.dtype %3242, %int6, %false, %false, %none : !torch.vtensor<[40,2304,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,64],f32>
    %3259 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3260 = torch.aten.broadcast_to %3258, %3259 : !torch.vtensor<[40,2304,64],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %3261 = torch.aten.empty.memory_format %2628, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,2304],f32>
    %3262 = torch.aten.transpose.int %3260, %int-1, %int-2 : !torch.vtensor<[40,2304,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[40,64,2304],f32>
    %3263 = torch.aten.bmm %3253, %3262 : !torch.vtensor<[40,2304,64],f32>, !torch.vtensor<[40,64,2304],f32> -> !torch.vtensor<[40,2304,2304],f32>
    %3264 = torch.aten.mul.Scalar %3263, %float1.250000e-01 : !torch.vtensor<[40,2304,2304],f32>, !torch.float -> !torch.vtensor<[40,2304,2304],f32>
    %3265 = torch.aten.add.Tensor %3264, %3261, %int0 : !torch.vtensor<[40,2304,2304],f32>, !torch.vtensor<[40,2304,2304],f32>, !torch.int -> !torch.vtensor<[40,2304,2304],f32>
    %values_110, %indices_111 = torch.aten.max.dim %3265, %int-1, %true : !torch.vtensor<[40,2304,2304],f32>, !torch.int, !torch.bool -> !torch.vtensor<[40,2304,1],f32>, !torch.vtensor<[40,2304,1],si64>
    %3266 = torch.aten.sub.Tensor %3265, %values_110, %float1.000000e00 : !torch.vtensor<[40,2304,2304],f32>, !torch.vtensor<[40,2304,1],f32>, !torch.float -> !torch.vtensor<[40,2304,2304],f32>
    %3267 = torch.aten.exp %3266 : !torch.vtensor<[40,2304,2304],f32> -> !torch.vtensor<[40,2304,2304],f32>
    %3268 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3269 = torch.aten.sum.dim_IntList %3267, %3268, %true, %none : !torch.vtensor<[40,2304,2304],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,1],f32>
    %3270 = torch.aten.div.Tensor %3267, %3269 : !torch.vtensor<[40,2304,2304],f32>, !torch.vtensor<[40,2304,1],f32> -> !torch.vtensor<[40,2304,2304],f32>
    %3271 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3272 = torch.aten.to.dtype %3271, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3273 = torch.prim.ListConstruct %int40, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3274 = torch.aten.broadcast_to %3272, %3273 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,2304],f16>
    %3275 = torch.aten.to.dtype %3270, %int5, %false, %false, %none : !torch.vtensor<[40,2304,2304],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,2304],f16>
    %3276 = torch.prim.ListConstruct %int40, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3277 = torch.aten.broadcast_to %3275, %3276 : !torch.vtensor<[40,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,2304],f16>
    %3278 = torch.aten.bmm %3277, %3246 : !torch.vtensor<[40,2304,2304],f16>, !torch.vtensor<[40,2304,64],f16> -> !torch.vtensor<[40,2304,64],f16>
    %3279 = torch.aten.view %3278, %2647 : !torch.vtensor<[40,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %3280 = torch.aten.permute %3279, %1239 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %3281 = torch.aten.clone %3280, %int0 : !torch.vtensor<[4,2304,10,64],f16>, !torch.int -> !torch.vtensor<[4,2304,10,64],f16>
    %3282 = torch.aten.view %3281, %2482 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3283 = torch.aten.view %3282, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3284 = torch.aten.abs %3283 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_112, %indices_113 = torch.aten.max.dim %3284, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3285 = torch.aten.view %values_112, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3286 = torch.aten.broadcast_to %3285, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3287 = torch.aten.clone %3286, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3288 = torch.aten.view %3287, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3289 = torch.aten.div.Scalar %3288, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3290 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3291 = torch.aten.detach %3290 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3292 = torch.aten.div.Tensor %3282, %3289 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3293 = torch.aten.add.Tensor %3292, %3291, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3294 = torch.aten.round %3293 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3295 = torch.aten.clamp %3294, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3296 = torch.aten.sub.Tensor %3295, %3291, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3297 = torch.aten.mul.Tensor %3296, %3289 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3298 = torch.aten.broadcast_to %754, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3299 = torch.aten.clone %3298, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3300 = torch.aten.view %3299, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3301 = torch.aten.mul.Tensor %753, %3300 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3302 = torch.aten.transpose.int %3301, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3303 = torch.aten.view %3297, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3304 = torch.aten.mm %3303, %3302 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3305 = torch.aten.mul.Scalar %752, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %3306 = torch.aten.add.Tensor %3305, %3304, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %3307 = torch.aten.view %3306, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3308 = torch.aten.add.Tensor %3307, %3149, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3309 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3310 = torch.aten.sum.dim_IntList %3308, %3309, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %3311 = torch.aten.div.Scalar %3310, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3312 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3313 = torch.aten.broadcast_to %3311, %3312 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3314 = torch.aten.sub.Tensor %3308, %3313, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3315 = torch.aten.mul.Tensor %3314, %3314 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3316 = torch.aten.sum.dim_IntList %3315, %3309, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %3317 = torch.aten.div.Scalar %3316, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3318 = torch.aten.add.Scalar %3317, %float1.000000e-05, %int1 : !torch.vtensor<[4,2304,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3319 = torch.aten.rsqrt %3318 : !torch.vtensor<[4,2304,1],f16> -> !torch.vtensor<[4,2304,1],f16>
    %3320 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3321 = torch.aten.broadcast_to %3319, %3320 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3322 = torch.aten.mul.Tensor %3314, %3321 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3323 = torch.aten.mul.Tensor %3322, %751 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3324 = torch.aten.add.Tensor %3323, %750, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3325 = torch.aten.view %3324, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3326 = torch.aten.abs %3325 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_114, %indices_115 = torch.aten.max.dim %3326, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3327 = torch.aten.view %values_114, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3328 = torch.aten.broadcast_to %3327, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3329 = torch.aten.clone %3328, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3330 = torch.aten.view %3329, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3331 = torch.aten.div.Scalar %3330, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3332 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3333 = torch.aten.detach %3332 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3334 = torch.aten.div.Tensor %3324, %3331 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3335 = torch.aten.add.Tensor %3334, %3333, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3336 = torch.aten.round %3335 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3337 = torch.aten.clamp %3336, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3338 = torch.aten.sub.Tensor %3337, %3333, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3339 = torch.aten.mul.Tensor %3338, %3331 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3340 = torch.aten.broadcast_to %749, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3341 = torch.aten.clone %3340, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3342 = torch.aten.view %3341, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3343 = torch.aten.mul.Tensor %748, %3342 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3344 = torch.aten.transpose.int %3343, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3345 = torch.aten.view %3339, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3346 = torch.aten.mm %3345, %3344 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3347 = torch.aten.view %3346, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3348 = torch.aten.view %3347, %2554 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %3349 = torch.aten.permute %3348, %1239 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %3350 = torch.aten.clone %3349, %int0 : !torch.vtensor<[4,10,2304,64],f16>, !torch.int -> !torch.vtensor<[4,10,2304,64],f16>
    %3351 = torch.aten.view %3350, %2558 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f16>
    %3352 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %3353 = torch.aten.abs %3352 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_116, %indices_117 = torch.aten.max.dim %3353, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %3354 = torch.aten.view %values_116, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %3355 = torch.aten.broadcast_to %3354, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %3356 = torch.aten.clone %3355, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %3357 = torch.aten.view %3356, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %3358 = torch.aten.div.Scalar %3357, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3359 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3360 = torch.aten.detach %3359 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3361 = torch.aten.div.Tensor %arg2, %3358 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %3362 = torch.aten.add.Tensor %3361, %3360, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3363 = torch.aten.round %3362 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %3364 = torch.aten.clamp %3363, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3365 = torch.aten.sub.Tensor %3364, %3360, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3366 = torch.aten.mul.Tensor %3365, %3358 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %3367 = torch.aten.broadcast_to %747, %2736 : !torch.vtensor<[640,64,1],f16>, !torch.list<int> -> !torch.vtensor<[640,64,16],f16>
    %3368 = torch.aten.clone %3367, %int0 : !torch.vtensor<[640,64,16],f16>, !torch.int -> !torch.vtensor<[640,64,16],f16>
    %3369 = torch.aten.view %3368, %2739 : !torch.vtensor<[640,64,16],f16>, !torch.list<int> -> !torch.vtensor<[640,1024],f16>
    %3370 = torch.aten.mul.Tensor %746, %3369 : !torch.vtensor<[640,1024],si8>, !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[640,1024],f16>
    %3371 = torch.aten.transpose.int %3370, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16>
    %3372 = torch.aten.view %3366, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %3373 = torch.aten.mm %3372, %3371 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[308,640],f16>
    %3374 = torch.aten.view %3373, %2745 : !torch.vtensor<[308,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,640],f16>
    %3375 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %3376 = torch.aten.abs %3375 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_118, %indices_119 = torch.aten.max.dim %3376, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %3377 = torch.aten.view %values_118, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %3378 = torch.aten.broadcast_to %3377, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %3379 = torch.aten.clone %3378, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %3380 = torch.aten.view %3379, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %3381 = torch.aten.div.Scalar %3380, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3382 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3383 = torch.aten.detach %3382 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3384 = torch.aten.div.Tensor %arg2, %3381 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %3385 = torch.aten.add.Tensor %3384, %3383, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3386 = torch.aten.round %3385 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %3387 = torch.aten.clamp %3386, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3388 = torch.aten.sub.Tensor %3387, %3383, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %3389 = torch.aten.mul.Tensor %3388, %3381 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %3390 = torch.aten.broadcast_to %745, %2736 : !torch.vtensor<[640,64,1],f16>, !torch.list<int> -> !torch.vtensor<[640,64,16],f16>
    %3391 = torch.aten.clone %3390, %int0 : !torch.vtensor<[640,64,16],f16>, !torch.int -> !torch.vtensor<[640,64,16],f16>
    %3392 = torch.aten.view %3391, %2739 : !torch.vtensor<[640,64,16],f16>, !torch.list<int> -> !torch.vtensor<[640,1024],f16>
    %3393 = torch.aten.mul.Tensor %744, %3392 : !torch.vtensor<[640,1024],si8>, !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[640,1024],f16>
    %3394 = torch.aten.transpose.int %3393, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16>
    %3395 = torch.aten.view %3389, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %3396 = torch.aten.mm %3395, %3394 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[308,640],f16>
    %3397 = torch.aten.view %3396, %2745 : !torch.vtensor<[308,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,640],f16>
    %3398 = torch.aten.view %3374, %2770 : !torch.vtensor<[4,77,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,10,64],f16>
    %3399 = torch.aten.permute %3398, %1239 : !torch.vtensor<[4,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,77,64],f16>
    %3400 = torch.aten.clone %3399, %int0 : !torch.vtensor<[4,10,77,64],f16>, !torch.int -> !torch.vtensor<[4,10,77,64],f16>
    %3401 = torch.aten.view %3400, %2774 : !torch.vtensor<[4,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
    %3402 = torch.aten.view %3397, %2770 : !torch.vtensor<[4,77,640],f16>, !torch.list<int> -> !torch.vtensor<[4,77,10,64],f16>
    %3403 = torch.aten.permute %3402, %1239 : !torch.vtensor<[4,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,77,64],f16>
    %3404 = torch.aten.clone %3403, %int0 : !torch.vtensor<[4,10,77,64],f16>, !torch.int -> !torch.vtensor<[4,10,77,64],f16>
    %3405 = torch.aten.view %3404, %2774 : !torch.vtensor<[4,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
    %3406 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3407 = torch.aten.to.dtype %3406, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3408 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3409 = torch.aten.broadcast_to %3407, %3408 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %3410 = torch.aten.to.dtype %3351, %int6, %false, %false, %none : !torch.vtensor<[40,2304,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,64],f32>
    %3411 = torch.prim.ListConstruct %int40, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3412 = torch.aten.broadcast_to %3410, %3411 : !torch.vtensor<[40,2304,64],f32>, !torch.list<int> -> !torch.vtensor<[40,2304,64],f32>
    %3413 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3414 = torch.aten.to.dtype %3413, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3415 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3416 = torch.aten.broadcast_to %3414, %3415 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[40,77,64],f32>
    %3417 = torch.aten.to.dtype %3401, %int6, %false, %false, %none : !torch.vtensor<[40,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,77,64],f32>
    %3418 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3419 = torch.aten.broadcast_to %3417, %3418 : !torch.vtensor<[40,77,64],f32>, !torch.list<int> -> !torch.vtensor<[40,77,64],f32>
    %3420 = torch.aten.empty.memory_format %2794, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,77],f32>
    %3421 = torch.aten.transpose.int %3419, %int-1, %int-2 : !torch.vtensor<[40,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[40,64,77],f32>
    %3422 = torch.aten.bmm %3412, %3421 : !torch.vtensor<[40,2304,64],f32>, !torch.vtensor<[40,64,77],f32> -> !torch.vtensor<[40,2304,77],f32>
    %3423 = torch.aten.mul.Scalar %3422, %float1.250000e-01 : !torch.vtensor<[40,2304,77],f32>, !torch.float -> !torch.vtensor<[40,2304,77],f32>
    %3424 = torch.aten.add.Tensor %3423, %3420, %int0 : !torch.vtensor<[40,2304,77],f32>, !torch.vtensor<[40,2304,77],f32>, !torch.int -> !torch.vtensor<[40,2304,77],f32>
    %values_120, %indices_121 = torch.aten.max.dim %3424, %int-1, %true : !torch.vtensor<[40,2304,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[40,2304,1],f32>, !torch.vtensor<[40,2304,1],si64>
    %3425 = torch.aten.sub.Tensor %3424, %values_120, %float1.000000e00 : !torch.vtensor<[40,2304,77],f32>, !torch.vtensor<[40,2304,1],f32>, !torch.float -> !torch.vtensor<[40,2304,77],f32>
    %3426 = torch.aten.exp %3425 : !torch.vtensor<[40,2304,77],f32> -> !torch.vtensor<[40,2304,77],f32>
    %3427 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3428 = torch.aten.sum.dim_IntList %3426, %3427, %true, %none : !torch.vtensor<[40,2304,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,1],f32>
    %3429 = torch.aten.div.Tensor %3426, %3428 : !torch.vtensor<[40,2304,77],f32>, !torch.vtensor<[40,2304,1],f32> -> !torch.vtensor<[40,2304,77],f32>
    %3430 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3431 = torch.aten.to.dtype %3430, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3432 = torch.prim.ListConstruct %int40, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3433 = torch.aten.broadcast_to %3431, %3432 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,77],f16>
    %3434 = torch.aten.to.dtype %3429, %int5, %false, %false, %none : !torch.vtensor<[40,2304,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,2304,77],f16>
    %3435 = torch.prim.ListConstruct %int40, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3436 = torch.aten.broadcast_to %3434, %3435 : !torch.vtensor<[40,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[40,2304,77],f16>
    %3437 = torch.aten.bmm %3436, %3405 : !torch.vtensor<[40,2304,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,2304,64],f16>
    %3438 = torch.aten.view %3437, %2647 : !torch.vtensor<[40,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,10,2304,64],f16>
    %3439 = torch.aten.permute %3438, %1239 : !torch.vtensor<[4,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,10,64],f16>
    %3440 = torch.aten.clone %3439, %int0 : !torch.vtensor<[4,2304,10,64],f16>, !torch.int -> !torch.vtensor<[4,2304,10,64],f16>
    %3441 = torch.aten.view %3440, %2482 : !torch.vtensor<[4,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3442 = torch.aten.view %3441, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3443 = torch.aten.abs %3442 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_122, %indices_123 = torch.aten.max.dim %3443, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3444 = torch.aten.view %values_122, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3445 = torch.aten.broadcast_to %3444, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3446 = torch.aten.clone %3445, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3447 = torch.aten.view %3446, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3448 = torch.aten.div.Scalar %3447, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3449 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3450 = torch.aten.detach %3449 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3451 = torch.aten.div.Tensor %3441, %3448 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3452 = torch.aten.add.Tensor %3451, %3450, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3453 = torch.aten.round %3452 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3454 = torch.aten.clamp %3453, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3455 = torch.aten.sub.Tensor %3454, %3450, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3456 = torch.aten.mul.Tensor %3455, %3448 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3457 = torch.aten.broadcast_to %743, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3458 = torch.aten.clone %3457, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3459 = torch.aten.view %3458, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3460 = torch.aten.mul.Tensor %742, %3459 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3461 = torch.aten.transpose.int %3460, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3462 = torch.aten.view %3456, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3463 = torch.aten.mm %3462, %3461 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3464 = torch.aten.mul.Scalar %741, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %3465 = torch.aten.add.Tensor %3464, %3463, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %3466 = torch.aten.view %3465, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3467 = torch.aten.add.Tensor %3466, %3308, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3468 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3469 = torch.aten.sum.dim_IntList %3467, %3468, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %3470 = torch.aten.div.Scalar %3469, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3471 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3472 = torch.aten.broadcast_to %3470, %3471 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3473 = torch.aten.sub.Tensor %3467, %3472, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3474 = torch.aten.mul.Tensor %3473, %3473 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3475 = torch.aten.sum.dim_IntList %3474, %3468, %true, %none : !torch.vtensor<[4,2304,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,2304,1],f16>
    %3476 = torch.aten.div.Scalar %3475, %int640 : !torch.vtensor<[4,2304,1],f16>, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3477 = torch.aten.add.Scalar %3476, %float1.000000e-05, %int1 : !torch.vtensor<[4,2304,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,2304,1],f16>
    %3478 = torch.aten.rsqrt %3477 : !torch.vtensor<[4,2304,1],f16> -> !torch.vtensor<[4,2304,1],f16>
    %3479 = torch.prim.ListConstruct %int4, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3480 = torch.aten.broadcast_to %3478, %3479 : !torch.vtensor<[4,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3481 = torch.aten.mul.Tensor %3473, %3480 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3482 = torch.aten.mul.Tensor %3481, %740 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3483 = torch.aten.add.Tensor %3482, %739, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3484 = torch.aten.view %3483, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3485 = torch.aten.abs %3484 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_124, %indices_125 = torch.aten.max.dim %3485, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3486 = torch.aten.view %values_124, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3487 = torch.aten.broadcast_to %3486, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3488 = torch.aten.clone %3487, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3489 = torch.aten.view %3488, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3490 = torch.aten.div.Scalar %3489, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3491 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3492 = torch.aten.detach %3491 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3493 = torch.aten.div.Tensor %3483, %3490 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3494 = torch.aten.add.Tensor %3493, %3492, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3495 = torch.aten.round %3494 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3496 = torch.aten.clamp %3495, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3497 = torch.aten.sub.Tensor %3496, %3492, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3498 = torch.aten.mul.Tensor %3497, %3490 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3499 = torch.aten.broadcast_to %738, %2874 : !torch.vtensor<[5120,40,1],f16>, !torch.list<int> -> !torch.vtensor<[5120,40,16],f16>
    %3500 = torch.aten.clone %3499, %int0 : !torch.vtensor<[5120,40,16],f16>, !torch.int -> !torch.vtensor<[5120,40,16],f16>
    %3501 = torch.aten.view %3500, %2877 : !torch.vtensor<[5120,40,16],f16>, !torch.list<int> -> !torch.vtensor<[5120,640],f16>
    %3502 = torch.aten.mul.Tensor %737, %3501 : !torch.vtensor<[5120,640],si8>, !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[5120,640],f16>
    %3503 = torch.aten.transpose.int %3502, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16>
    %3504 = torch.aten.view %3498, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3505 = torch.aten.mm %3504, %3503 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[9216,5120],f16>
    %3506 = torch.aten.mul.Scalar %736, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16>
    %3507 = torch.aten.add.Tensor %3506, %3505, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[9216,5120],f16>, !torch.int -> !torch.vtensor<[9216,5120],f16>
    %3508 = torch.aten.view %3507, %2885 : !torch.vtensor<[9216,5120],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,5120],f16>
    %3509 = torch.aten.slice.Tensor %3508, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[4,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %3510 = torch.aten.slice.Tensor %3508, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[4,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %3511 = torch.aten.gelu %3510, %str : !torch.vtensor<[4,2304,2560],f16>, !torch.str -> !torch.vtensor<[4,2304,2560],f16>
    %3512 = torch.aten.mul.Tensor %3509, %3511 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %3513 = torch.aten.view %3512, %2891 : !torch.vtensor<[4,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,160,16],f16>
    %3514 = torch.aten.abs %3513 : !torch.vtensor<[4,2304,160,16],f16> -> !torch.vtensor<[4,2304,160,16],f16>
    %values_126, %indices_127 = torch.aten.max.dim %3514, %int3, %true : !torch.vtensor<[4,2304,160,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,160,1],f16>, !torch.vtensor<[4,2304,160,1],si64>
    %3515 = torch.aten.view %values_126, %2894 : !torch.vtensor<[4,2304,160,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,160,1],f16>
    %3516 = torch.aten.broadcast_to %3515, %2891 : !torch.vtensor<[4,2304,160,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,160,16],f16>
    %3517 = torch.aten.clone %3516, %int0 : !torch.vtensor<[4,2304,160,16],f16>, !torch.int -> !torch.vtensor<[4,2304,160,16],f16>
    %3518 = torch.aten.view %3517, %2898 : !torch.vtensor<[4,2304,160,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,2560],f16>
    %3519 = torch.aten.div.Scalar %3518, %int128 : !torch.vtensor<[4,2304,2560],f16>, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %3520 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3521 = torch.aten.detach %3520 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3522 = torch.aten.div.Tensor %3512, %3519 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %3523 = torch.aten.add.Tensor %3522, %3521, %int1 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %3524 = torch.aten.round %3523 : !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %3525 = torch.aten.clamp %3524, %int-128, %int127 : !torch.vtensor<[4,2304,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %3526 = torch.aten.sub.Tensor %3525, %3521, %int1 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,2560],f16>
    %3527 = torch.aten.mul.Tensor %3526, %3519 : !torch.vtensor<[4,2304,2560],f16>, !torch.vtensor<[4,2304,2560],f16> -> !torch.vtensor<[4,2304,2560],f16>
    %3528 = torch.aten.broadcast_to %735, %2909 : !torch.vtensor<[640,160,1],f16>, !torch.list<int> -> !torch.vtensor<[640,160,16],f16>
    %3529 = torch.aten.clone %3528, %int0 : !torch.vtensor<[640,160,16],f16>, !torch.int -> !torch.vtensor<[640,160,16],f16>
    %3530 = torch.aten.view %3529, %2912 : !torch.vtensor<[640,160,16],f16>, !torch.list<int> -> !torch.vtensor<[640,2560],f16>
    %3531 = torch.aten.mul.Tensor %734, %3530 : !torch.vtensor<[640,2560],si8>, !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[640,2560],f16>
    %3532 = torch.aten.transpose.int %3531, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16>
    %3533 = torch.aten.view %3527, %2916 : !torch.vtensor<[4,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[9216,2560],f16>
    %3534 = torch.aten.mm %3533, %3532 : !torch.vtensor<[9216,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3535 = torch.aten.mul.Scalar %733, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %3536 = torch.aten.add.Tensor %3535, %3534, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %3537 = torch.aten.view %3536, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3538 = torch.aten.add.Tensor %3537, %3467, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3539 = torch.aten.view %3538, %2485 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3540 = torch.aten.abs %3539 : !torch.vtensor<[4,2304,40,16],f16> -> !torch.vtensor<[4,2304,40,16],f16>
    %values_128, %indices_129 = torch.aten.max.dim %3540, %int3, %true : !torch.vtensor<[4,2304,40,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,2304,40,1],f16>, !torch.vtensor<[4,2304,40,1],si64>
    %3541 = torch.aten.view %values_128, %2488 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,1],f16>
    %3542 = torch.aten.broadcast_to %3541, %2485 : !torch.vtensor<[4,2304,40,1],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,40,16],f16>
    %3543 = torch.aten.clone %3542, %int0 : !torch.vtensor<[4,2304,40,16],f16>, !torch.int -> !torch.vtensor<[4,2304,40,16],f16>
    %3544 = torch.aten.view %3543, %2482 : !torch.vtensor<[4,2304,40,16],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3545 = torch.aten.div.Scalar %3544, %int128 : !torch.vtensor<[4,2304,640],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3546 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3547 = torch.aten.detach %3546 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3548 = torch.aten.div.Tensor %3538, %3545 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3549 = torch.aten.add.Tensor %3548, %3547, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3550 = torch.aten.round %3549 : !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3551 = torch.aten.clamp %3550, %int-128, %int127 : !torch.vtensor<[4,2304,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3552 = torch.aten.sub.Tensor %3551, %3547, %int1 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2304,640],f16>
    %3553 = torch.aten.mul.Tensor %3552, %3545 : !torch.vtensor<[4,2304,640],f16>, !torch.vtensor<[4,2304,640],f16> -> !torch.vtensor<[4,2304,640],f16>
    %3554 = torch.aten.broadcast_to %732, %2502 : !torch.vtensor<[640,40,1],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16],f16>
    %3555 = torch.aten.clone %3554, %int0 : !torch.vtensor<[640,40,16],f16>, !torch.int -> !torch.vtensor<[640,40,16],f16>
    %3556 = torch.aten.view %3555, %2505 : !torch.vtensor<[640,40,16],f16>, !torch.list<int> -> !torch.vtensor<[640,640],f16>
    %3557 = torch.aten.mul.Tensor %731, %3556 : !torch.vtensor<[640,640],si8>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
    %3558 = torch.aten.transpose.int %3557, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16>
    %3559 = torch.aten.view %3553, %2509 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[9216,640],f16>
    %3560 = torch.aten.mm %3559, %3558 : !torch.vtensor<[9216,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[9216,640],f16>
    %3561 = torch.aten.mul.Scalar %730, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16>
    %3562 = torch.aten.add.Tensor %3561, %3560, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[9216,640],f16>, !torch.int -> !torch.vtensor<[9216,640],f16>
    %3563 = torch.aten.view %3562, %2482 : !torch.vtensor<[9216,640],f16>, !torch.list<int> -> !torch.vtensor<[4,2304,640],f16>
    %3564 = torch.aten.view %3563, %2948 : !torch.vtensor<[4,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4,48,48,640],f16>
    %3565 = torch.aten.permute %3564, %1638 : !torch.vtensor<[4,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3566 = torch.aten.clone %3565, %int0 : !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3567 = torch.aten.add.Tensor %3566, %3083, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3568 = torch.aten.view %3567, %2395 : !torch.vtensor<[4,640,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %3569 = torch.aten.abs %3568 : !torch.vtensor<[4,40,16,48,48],f16> -> !torch.vtensor<[4,40,16,48,48],f16>
    %values_130, %indices_131 = torch.aten.max.dim %3569, %int2, %true : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,40,1,48,48],f16>, !torch.vtensor<[4,40,1,48,48],si64>
    %3570 = torch.aten.view %values_130, %2398 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,1,48,48],f16>
    %3571 = torch.aten.broadcast_to %3570, %2395 : !torch.vtensor<[4,40,1,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,48,48],f16>
    %3572 = torch.aten.clone %3571, %int0 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.int -> !torch.vtensor<[4,40,16,48,48],f16>
    %3573 = torch.aten.view %3572, %2376 : !torch.vtensor<[4,40,16,48,48],f16>, !torch.list<int> -> !torch.vtensor<[4,640,48,48],f16>
    %3574 = torch.aten.div.Scalar %3573, %int128 : !torch.vtensor<[4,640,48,48],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3575 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3576 = torch.aten.detach %3575 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3577 = torch.aten.div.Tensor %3567, %3574 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3578 = torch.aten.add.Tensor %3577, %3576, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3579 = torch.aten.round %3578 : !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3580 = torch.aten.clamp %3579, %int-128, %int127 : !torch.vtensor<[4,640,48,48],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3581 = torch.aten.sub.Tensor %3580, %3576, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,48,48],f16>
    %3582 = torch.aten.mul.Tensor %3581, %3574 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[4,640,48,48],f16> -> !torch.vtensor<[4,640,48,48],f16>
    %3583 = torch.aten.broadcast_to %729, %2412 : !torch.vtensor<[640,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,40,16,3,3],f16>
    %3584 = torch.aten.clone %3583, %int0 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[640,40,16,3,3],f16>
    %3585 = torch.aten.view %3584, %2415 : !torch.vtensor<[640,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[640,640,3,3],f16>
    %3586 = torch.aten.mul.Tensor %728, %3585 : !torch.vtensor<[640,640,3,3],si8>, !torch.vtensor<[640,640,3,3],f16> -> !torch.vtensor<[640,640,3,3],f16>
    %3587 = torch.aten.convolution %3582, %3586, %727, %2276, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3588 = torch.prim.ListConstruct %int4, %int32, %int20, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3589 = torch.aten.view %3587, %3588 : !torch.vtensor<[4,640,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,20,576],f16>
    %3590 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3591 = torch.aten.to.dtype %3590, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3592 = torch.prim.ListConstruct %int4, %int32, %int20, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3593 = torch.aten.broadcast_to %3591, %3592 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,576],f32>
    %3594 = torch.aten.to.dtype %3589, %int6, %false, %false, %none : !torch.vtensor<[4,32,20,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,576],f32>
    %3595 = torch.prim.ListConstruct %int4, %int32, %int20, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3596 = torch.aten.broadcast_to %3594, %3595 : !torch.vtensor<[4,32,20,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,20,576],f32>
    %3597 = torch.aten.to.dtype %3596, %int7, %false, %false, %none : !torch.vtensor<[4,32,20,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,20,576],f64>
    %3598 = torch.aten.sum.dim_IntList %3597, %996, %true, %none : !torch.vtensor<[4,32,20,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3599 = torch.aten.div.Scalar %3598, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3600 = torch.aten.sub.Tensor %3597, %3599, %float1.000000e00 : !torch.vtensor<[4,32,20,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,20,576],f64>
    %3601 = torch.aten.mul.Tensor %3600, %3600 : !torch.vtensor<[4,32,20,576],f64>, !torch.vtensor<[4,32,20,576],f64> -> !torch.vtensor<[4,32,20,576],f64>
    %3602 = torch.aten.sum.dim_IntList %3601, %996, %true, %none : !torch.vtensor<[4,32,20,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3603 = torch.aten.div.Scalar %3602, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3604 = torch.aten.to.dtype %3603, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3605 = torch.aten.sum.dim_IntList %3596, %996, %true, %none : !torch.vtensor<[4,32,20,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3606 = torch.aten.div.Scalar %3605, %int11520 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3607 = torch.aten.add.Tensor %3604, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3608 = torch.aten.rsqrt %3607 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %3609 = torch.aten.sub.Tensor %3589, %3606, %int1 : !torch.vtensor<[4,32,20,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,20,576],f32>
    %3610 = torch.aten.mul.Tensor %3609, %3608 : !torch.vtensor<[4,32,20,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,20,576],f32>
    %3611 = torch.prim.ListConstruct %int4, %int640, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3612 = torch.aten.view %3610, %3611 : !torch.vtensor<[4,32,20,576],f32>, !torch.list<int> -> !torch.vtensor<[4,640,24,24],f32>
    %3613 = torch.aten.unsqueeze %726, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %3614 = torch.aten.unsqueeze %3613, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %3615 = torch.aten.unsqueeze %3614, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %3616 = torch.aten.unsqueeze %725, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16>
    %3617 = torch.aten.unsqueeze %3616, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16>
    %3618 = torch.aten.unsqueeze %3617, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16>
    %3619 = torch.aten.mul.Tensor %3612, %3618 : !torch.vtensor<[4,640,24,24],f32>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[4,640,24,24],f32>
    %3620 = torch.aten.add.Tensor %3619, %3615, %int1 : !torch.vtensor<[4,640,24,24],f32>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f32>
    %3621 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3622 = torch.aten.to.dtype %3621, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3623 = torch.prim.ListConstruct %int4, %int640, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3624 = torch.aten.broadcast_to %3622, %3623 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,640,24,24],f16>
    %3625 = torch.aten.to.dtype %3620, %int5, %false, %false, %none : !torch.vtensor<[4,640,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,640,24,24],f16>
    %3626 = torch.prim.ListConstruct %int4, %int640, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3627 = torch.aten.broadcast_to %3625, %3626 : !torch.vtensor<[4,640,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,640,24,24],f16>
    %3628 = torch.aten.sigmoid %3627 : !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3629 = torch.aten.mul.Tensor %3628, %3627 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3630 = torch.prim.ListConstruct %int4, %int40, %int16, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3631 = torch.aten.view %3629, %3630 : !torch.vtensor<[4,640,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,24,24],f16>
    %3632 = torch.aten.abs %3631 : !torch.vtensor<[4,40,16,24,24],f16> -> !torch.vtensor<[4,40,16,24,24],f16>
    %values_132, %indices_133 = torch.aten.max.dim %3632, %int2, %true : !torch.vtensor<[4,40,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,40,1,24,24],f16>, !torch.vtensor<[4,40,1,24,24],si64>
    %3633 = torch.prim.ListConstruct %int4, %int40, %int1, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3634 = torch.aten.view %values_132, %3633 : !torch.vtensor<[4,40,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,40,1,24,24],f16>
    %3635 = torch.aten.broadcast_to %3634, %3630 : !torch.vtensor<[4,40,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,24,24],f16>
    %3636 = torch.aten.clone %3635, %int0 : !torch.vtensor<[4,40,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,40,16,24,24],f16>
    %3637 = torch.aten.view %3636, %3611 : !torch.vtensor<[4,40,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,640,24,24],f16>
    %3638 = torch.aten.div.Scalar %3637, %int128 : !torch.vtensor<[4,640,24,24],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3639 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3640 = torch.aten.detach %3639 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3641 = torch.aten.div.Tensor %3629, %3638 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3642 = torch.aten.add.Tensor %3641, %3640, %int1 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3643 = torch.aten.round %3642 : !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3644 = torch.aten.clamp %3643, %int-128, %int127 : !torch.vtensor<[4,640,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3645 = torch.aten.sub.Tensor %3644, %3640, %int1 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3646 = torch.aten.mul.Tensor %3645, %3638 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3647 = torch.prim.ListConstruct %int1280, %int40, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3648 = torch.aten.broadcast_to %724, %3647 : !torch.vtensor<[1280,40,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,40,16,3,3],f16>
    %3649 = torch.aten.clone %3648, %int0 : !torch.vtensor<[1280,40,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,40,16,3,3],f16>
    %3650 = torch.prim.ListConstruct %int1280, %int640, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3651 = torch.aten.view %3649, %3650 : !torch.vtensor<[1280,40,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,640,3,3],f16>
    %3652 = torch.aten.mul.Tensor %723, %3651 : !torch.vtensor<[1280,640,3,3],si8>, !torch.vtensor<[1280,640,3,3],f16> -> !torch.vtensor<[1280,640,3,3],f16>
    %3653 = torch.aten.convolution %3646, %3652, %722, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3654 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %3655 = torch.aten.mul.Tensor %3654, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %3656 = torch.aten.transpose.int %721, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3657 = torch.aten.mm %3655, %3656 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %3658 = torch.aten.mul.Scalar %720, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3659 = torch.aten.add.Tensor %3658, %3657, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %3660 = torch.aten.unsqueeze %3659, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %3661 = torch.aten.unsqueeze %3660, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %3662 = torch.aten.add.Tensor %3653, %3661, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3663 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3664 = torch.aten.view %3662, %3663 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f16>
    %3665 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3666 = torch.aten.to.dtype %3665, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3667 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3668 = torch.aten.broadcast_to %3666, %3667 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %3669 = torch.aten.to.dtype %3664, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f32>
    %3670 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3671 = torch.aten.broadcast_to %3669, %3670 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %3672 = torch.aten.to.dtype %3671, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f64>
    %3673 = torch.aten.sum.dim_IntList %3672, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3674 = torch.aten.div.Scalar %3673, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3675 = torch.aten.sub.Tensor %3672, %3674, %float1.000000e00 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,576],f64>
    %3676 = torch.aten.mul.Tensor %3675, %3675 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,40,576],f64> -> !torch.vtensor<[4,32,40,576],f64>
    %3677 = torch.aten.sum.dim_IntList %3676, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3678 = torch.aten.div.Scalar %3677, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3679 = torch.aten.to.dtype %3678, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3680 = torch.aten.sum.dim_IntList %3671, %996, %true, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3681 = torch.aten.div.Scalar %3680, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3682 = torch.aten.add.Tensor %3679, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3683 = torch.aten.rsqrt %3682 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %3684 = torch.aten.sub.Tensor %3664, %3681, %int1 : !torch.vtensor<[4,32,40,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,576],f32>
    %3685 = torch.aten.mul.Tensor %3684, %3683 : !torch.vtensor<[4,32,40,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,576],f32>
    %3686 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3687 = torch.aten.view %3685, %3686 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f32>
    %3688 = torch.aten.unsqueeze %719, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %3689 = torch.aten.unsqueeze %3688, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %3690 = torch.aten.unsqueeze %3689, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %3691 = torch.aten.unsqueeze %718, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %3692 = torch.aten.unsqueeze %3691, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %3693 = torch.aten.unsqueeze %3692, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %3694 = torch.aten.mul.Tensor %3687, %3693 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,24,24],f32>
    %3695 = torch.aten.add.Tensor %3694, %3690, %int1 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f32>
    %3696 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3697 = torch.aten.to.dtype %3696, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3698 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3699 = torch.aten.broadcast_to %3697, %3698 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %3700 = torch.aten.to.dtype %3695, %int5, %false, %false, %none : !torch.vtensor<[4,1280,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,24,24],f16>
    %3701 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3702 = torch.aten.broadcast_to %3700, %3701 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %3703 = torch.aten.sigmoid %3702 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %3704 = torch.aten.mul.Tensor %3703, %3702 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %3705 = torch.prim.ListConstruct %int4, %int80, %int16, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3706 = torch.aten.view %3704, %3705 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %3707 = torch.aten.abs %3706 : !torch.vtensor<[4,80,16,24,24],f16> -> !torch.vtensor<[4,80,16,24,24],f16>
    %values_134, %indices_135 = torch.aten.max.dim %3707, %int2, %true : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,24,24],f16>, !torch.vtensor<[4,80,1,24,24],si64>
    %3708 = torch.prim.ListConstruct %int4, %int80, %int1, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3709 = torch.aten.view %values_134, %3708 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,24,24],f16>
    %3710 = torch.aten.broadcast_to %3709, %3705 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %3711 = torch.aten.clone %3710, %int0 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,80,16,24,24],f16>
    %3712 = torch.aten.view %3711, %3686 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %3713 = torch.aten.div.Scalar %3712, %int128 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3714 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3715 = torch.aten.detach %3714 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3716 = torch.aten.div.Tensor %3704, %3713 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %3717 = torch.aten.add.Tensor %3716, %3715, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3718 = torch.aten.round %3717 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %3719 = torch.aten.clamp %3718, %int-128, %int127 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3720 = torch.aten.sub.Tensor %3719, %3715, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3721 = torch.aten.mul.Tensor %3720, %3713 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %3722 = torch.prim.ListConstruct %int1280, %int80, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3723 = torch.aten.broadcast_to %717, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %3724 = torch.aten.clone %3723, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %3725 = torch.prim.ListConstruct %int1280, %int1280, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3726 = torch.aten.view %3724, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %3727 = torch.aten.mul.Tensor %716, %3726 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %3728 = torch.aten.convolution %3721, %3727, %715, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3729 = torch.aten.view %3587, %3630 : !torch.vtensor<[4,640,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,24,24],f16>
    %3730 = torch.aten.abs %3729 : !torch.vtensor<[4,40,16,24,24],f16> -> !torch.vtensor<[4,40,16,24,24],f16>
    %values_136, %indices_137 = torch.aten.max.dim %3730, %int2, %true : !torch.vtensor<[4,40,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,40,1,24,24],f16>, !torch.vtensor<[4,40,1,24,24],si64>
    %3731 = torch.aten.view %values_136, %3633 : !torch.vtensor<[4,40,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,40,1,24,24],f16>
    %3732 = torch.aten.broadcast_to %3731, %3630 : !torch.vtensor<[4,40,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,40,16,24,24],f16>
    %3733 = torch.aten.clone %3732, %int0 : !torch.vtensor<[4,40,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,40,16,24,24],f16>
    %3734 = torch.aten.view %3733, %3611 : !torch.vtensor<[4,40,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,640,24,24],f16>
    %3735 = torch.aten.div.Scalar %3734, %int128 : !torch.vtensor<[4,640,24,24],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3736 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3737 = torch.aten.detach %3736 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3738 = torch.aten.div.Tensor %3587, %3735 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3739 = torch.aten.add.Tensor %3738, %3737, %int1 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3740 = torch.aten.round %3739 : !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3741 = torch.aten.clamp %3740, %int-128, %int127 : !torch.vtensor<[4,640,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3742 = torch.aten.sub.Tensor %3741, %3737, %int1 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,640,24,24],f16>
    %3743 = torch.aten.mul.Tensor %3742, %3735 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[4,640,24,24],f16> -> !torch.vtensor<[4,640,24,24],f16>
    %3744 = torch.prim.ListConstruct %int1280, %int40, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3745 = torch.aten.broadcast_to %714, %3744 : !torch.vtensor<[1280,40,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,40,16,1,1],f16>
    %3746 = torch.aten.clone %3745, %int0 : !torch.vtensor<[1280,40,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,40,16,1,1],f16>
    %3747 = torch.prim.ListConstruct %int1280, %int640, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3748 = torch.aten.view %3746, %3747 : !torch.vtensor<[1280,40,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,640,1,1],f16>
    %3749 = torch.aten.mul.Tensor %713, %3748 : !torch.vtensor<[1280,640,1,1],si8>, !torch.vtensor<[1280,640,1,1],f16> -> !torch.vtensor<[1280,640,1,1],f16>
    %3750 = torch.aten.convolution %3743, %3749, %712, %984, %985, %984, %false, %985, %int1 : !torch.vtensor<[4,640,24,24],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3751 = torch.aten.add.Tensor %3750, %3728, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %3752 = torch.aten.div.Tensor %3751, %925 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %3753 = torch.aten.view %3752, %3663 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f16>
    %3754 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3755 = torch.aten.to.dtype %3754, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3756 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3757 = torch.aten.broadcast_to %3755, %3756 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %3758 = torch.aten.to.dtype %3753, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f32>
    %3759 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3760 = torch.aten.broadcast_to %3758, %3759 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %3761 = torch.aten.to.dtype %3760, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f64>
    %3762 = torch.aten.sum.dim_IntList %3761, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3763 = torch.aten.div.Scalar %3762, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3764 = torch.aten.sub.Tensor %3761, %3763, %float1.000000e00 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,576],f64>
    %3765 = torch.aten.mul.Tensor %3764, %3764 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,40,576],f64> -> !torch.vtensor<[4,32,40,576],f64>
    %3766 = torch.aten.sum.dim_IntList %3765, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %3767 = torch.aten.div.Scalar %3766, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %3768 = torch.aten.to.dtype %3767, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3769 = torch.aten.sum.dim_IntList %3760, %996, %true, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %3770 = torch.aten.div.Scalar %3769, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3771 = torch.aten.add.Tensor %3768, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %3772 = torch.aten.rsqrt %3771 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %3773 = torch.aten.sub.Tensor %3753, %3770, %int1 : !torch.vtensor<[4,32,40,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,576],f32>
    %3774 = torch.aten.mul.Tensor %3773, %3772 : !torch.vtensor<[4,32,40,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,576],f32>
    %3775 = torch.aten.view %3774, %3686 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f32>
    %3776 = torch.aten.unsqueeze %711, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %3777 = torch.aten.unsqueeze %3776, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %3778 = torch.aten.unsqueeze %3777, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %3779 = torch.aten.unsqueeze %710, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %3780 = torch.aten.unsqueeze %3779, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %3781 = torch.aten.unsqueeze %3780, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %3782 = torch.aten.mul.Tensor %3775, %3781 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,24,24],f32>
    %3783 = torch.aten.add.Tensor %3782, %3778, %int1 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f32>
    %3784 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3785 = torch.aten.to.dtype %3784, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3786 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3787 = torch.aten.broadcast_to %3785, %3786 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %3788 = torch.aten.to.dtype %3783, %int5, %false, %false, %none : !torch.vtensor<[4,1280,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,24,24],f16>
    %3789 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3790 = torch.aten.broadcast_to %3788, %3789 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %3791 = torch.aten.permute %3790, %1163 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,24,24,1280],f16>
    %3792 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3793 = torch.aten.view %3791, %3792 : !torch.vtensor<[4,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3794 = torch.aten.clone %3793, %int0 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3795 = torch.prim.ListConstruct %int4, %int576, %int80, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3796 = torch.aten.view %3794, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3797 = torch.aten.abs %3796 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_138, %indices_139 = torch.aten.max.dim %3797, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %3798 = torch.prim.ListConstruct %int4, %int576, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3799 = torch.aten.view %values_138, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %3800 = torch.aten.broadcast_to %3799, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3801 = torch.aten.clone %3800, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %3802 = torch.aten.view %3801, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3803 = torch.aten.div.Scalar %3802, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3804 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3805 = torch.aten.detach %3804 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3806 = torch.aten.div.Tensor %3794, %3803 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3807 = torch.aten.add.Tensor %3806, %3805, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3808 = torch.aten.round %3807 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3809 = torch.aten.clamp %3808, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3810 = torch.aten.sub.Tensor %3809, %3805, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3811 = torch.aten.mul.Tensor %3810, %3803 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3812 = torch.prim.ListConstruct %int1280, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3813 = torch.aten.broadcast_to %709, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %3814 = torch.aten.clone %3813, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %3815 = torch.prim.ListConstruct %int1280, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %3816 = torch.aten.view %3814, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %3817 = torch.aten.mul.Tensor %708, %3816 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %3818 = torch.aten.transpose.int %3817, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3819 = torch.prim.ListConstruct %int2304, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %3820 = torch.aten.view %3811, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %3821 = torch.aten.mm %3820, %3818 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %3822 = torch.aten.mul.Scalar %707, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3823 = torch.aten.add.Tensor %3822, %3821, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %3824 = torch.aten.view %3823, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3825 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3826 = torch.aten.sum.dim_IntList %3824, %3825, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %3827 = torch.aten.div.Scalar %3826, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %3828 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3829 = torch.aten.broadcast_to %3827, %3828 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3830 = torch.aten.sub.Tensor %3824, %3829, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3831 = torch.aten.mul.Tensor %3830, %3830 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3832 = torch.aten.sum.dim_IntList %3831, %3825, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %3833 = torch.aten.div.Scalar %3832, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %3834 = torch.aten.add.Scalar %3833, %float1.000000e-05, %int1 : !torch.vtensor<[4,576,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %3835 = torch.aten.rsqrt %3834 : !torch.vtensor<[4,576,1],f16> -> !torch.vtensor<[4,576,1],f16>
    %3836 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3837 = torch.aten.broadcast_to %3835, %3836 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3838 = torch.aten.mul.Tensor %3830, %3837 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3839 = torch.aten.mul.Tensor %3838, %706 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3840 = torch.aten.add.Tensor %3839, %705, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3841 = torch.aten.view %3840, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3842 = torch.aten.abs %3841 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_140, %indices_141 = torch.aten.max.dim %3842, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %3843 = torch.aten.view %values_140, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %3844 = torch.aten.broadcast_to %3843, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3845 = torch.aten.clone %3844, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %3846 = torch.aten.view %3845, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3847 = torch.aten.div.Scalar %3846, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3848 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3849 = torch.aten.detach %3848 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3850 = torch.aten.div.Tensor %3840, %3847 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3851 = torch.aten.add.Tensor %3850, %3849, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3852 = torch.aten.round %3851 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3853 = torch.aten.clamp %3852, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3854 = torch.aten.sub.Tensor %3853, %3849, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3855 = torch.aten.mul.Tensor %3854, %3847 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3856 = torch.aten.broadcast_to %704, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %3857 = torch.aten.clone %3856, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %3858 = torch.aten.view %3857, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %3859 = torch.aten.mul.Tensor %703, %3858 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %3860 = torch.aten.transpose.int %3859, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3861 = torch.aten.view %3855, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %3862 = torch.aten.mm %3861, %3860 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %3863 = torch.aten.view %3862, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3864 = torch.prim.ListConstruct %int4, %int576, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3865 = torch.aten.view %3863, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %3866 = torch.aten.permute %3865, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %3867 = torch.aten.clone %3866, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %3868 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3869 = torch.aten.view %3867, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %3870 = torch.aten.view %3840, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3871 = torch.aten.abs %3870 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_142, %indices_143 = torch.aten.max.dim %3871, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %3872 = torch.aten.view %values_142, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %3873 = torch.aten.broadcast_to %3872, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3874 = torch.aten.clone %3873, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %3875 = torch.aten.view %3874, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3876 = torch.aten.div.Scalar %3875, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3877 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3878 = torch.aten.detach %3877 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3879 = torch.aten.div.Tensor %3840, %3876 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3880 = torch.aten.add.Tensor %3879, %3878, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3881 = torch.aten.round %3880 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3882 = torch.aten.clamp %3881, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3883 = torch.aten.sub.Tensor %3882, %3878, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3884 = torch.aten.mul.Tensor %3883, %3876 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3885 = torch.aten.broadcast_to %702, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %3886 = torch.aten.clone %3885, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %3887 = torch.aten.view %3886, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %3888 = torch.aten.mul.Tensor %701, %3887 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %3889 = torch.aten.transpose.int %3888, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3890 = torch.aten.view %3884, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %3891 = torch.aten.mm %3890, %3889 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %3892 = torch.aten.view %3891, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3893 = torch.aten.view %3840, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3894 = torch.aten.abs %3893 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_144, %indices_145 = torch.aten.max.dim %3894, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %3895 = torch.aten.view %values_144, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %3896 = torch.aten.broadcast_to %3895, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3897 = torch.aten.clone %3896, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %3898 = torch.aten.view %3897, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3899 = torch.aten.div.Scalar %3898, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3900 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3901 = torch.aten.detach %3900 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3902 = torch.aten.div.Tensor %3840, %3899 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3903 = torch.aten.add.Tensor %3902, %3901, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3904 = torch.aten.round %3903 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3905 = torch.aten.clamp %3904, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3906 = torch.aten.sub.Tensor %3905, %3901, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3907 = torch.aten.mul.Tensor %3906, %3899 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3908 = torch.aten.broadcast_to %700, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %3909 = torch.aten.clone %3908, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %3910 = torch.aten.view %3909, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %3911 = torch.aten.mul.Tensor %699, %3910 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %3912 = torch.aten.transpose.int %3911, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3913 = torch.aten.view %3907, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %3914 = torch.aten.mm %3913, %3912 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %3915 = torch.aten.view %3914, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3916 = torch.aten.view %3892, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %3917 = torch.aten.permute %3916, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %3918 = torch.aten.clone %3917, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %3919 = torch.aten.view %3918, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %3920 = torch.aten.view %3915, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %3921 = torch.aten.permute %3920, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %3922 = torch.aten.clone %3921, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %3923 = torch.aten.view %3922, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %3924 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3925 = torch.aten.to.dtype %3924, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3926 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3927 = torch.aten.broadcast_to %3925, %3926 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %3928 = torch.aten.to.dtype %3869, %int6, %false, %false, %none : !torch.vtensor<[80,576,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,64],f32>
    %3929 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3930 = torch.aten.broadcast_to %3928, %3929 : !torch.vtensor<[80,576,64],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %3931 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3932 = torch.aten.to.dtype %3931, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %3933 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3934 = torch.aten.broadcast_to %3932, %3933 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %3935 = torch.aten.to.dtype %3919, %int6, %false, %false, %none : !torch.vtensor<[80,576,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,64],f32>
    %3936 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3937 = torch.aten.broadcast_to %3935, %3936 : !torch.vtensor<[80,576,64],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %3938 = torch.prim.ListConstruct %int80, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3939 = torch.aten.empty.memory_format %3938, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[80,576,576],f32>
    %3940 = torch.aten.transpose.int %3937, %int-1, %int-2 : !torch.vtensor<[80,576,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[80,64,576],f32>
    %3941 = torch.aten.bmm %3930, %3940 : !torch.vtensor<[80,576,64],f32>, !torch.vtensor<[80,64,576],f32> -> !torch.vtensor<[80,576,576],f32>
    %3942 = torch.aten.mul.Scalar %3941, %float1.250000e-01 : !torch.vtensor<[80,576,576],f32>, !torch.float -> !torch.vtensor<[80,576,576],f32>
    %3943 = torch.aten.add.Tensor %3942, %3939, %int0 : !torch.vtensor<[80,576,576],f32>, !torch.vtensor<[80,576,576],f32>, !torch.int -> !torch.vtensor<[80,576,576],f32>
    %values_146, %indices_147 = torch.aten.max.dim %3943, %int-1, %true : !torch.vtensor<[80,576,576],f32>, !torch.int, !torch.bool -> !torch.vtensor<[80,576,1],f32>, !torch.vtensor<[80,576,1],si64>
    %3944 = torch.aten.sub.Tensor %3943, %values_146, %float1.000000e00 : !torch.vtensor<[80,576,576],f32>, !torch.vtensor<[80,576,1],f32>, !torch.float -> !torch.vtensor<[80,576,576],f32>
    %3945 = torch.aten.exp %3944 : !torch.vtensor<[80,576,576],f32> -> !torch.vtensor<[80,576,576],f32>
    %3946 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %3947 = torch.aten.sum.dim_IntList %3945, %3946, %true, %none : !torch.vtensor<[80,576,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[80,576,1],f32>
    %3948 = torch.aten.div.Tensor %3945, %3947 : !torch.vtensor<[80,576,576],f32>, !torch.vtensor<[80,576,1],f32> -> !torch.vtensor<[80,576,576],f32>
    %3949 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %3950 = torch.aten.to.dtype %3949, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %3951 = torch.prim.ListConstruct %int80, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3952 = torch.aten.broadcast_to %3950, %3951 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[80,576,576],f16>
    %3953 = torch.aten.to.dtype %3948, %int5, %false, %false, %none : !torch.vtensor<[80,576,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,576],f16>
    %3954 = torch.prim.ListConstruct %int80, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3955 = torch.aten.broadcast_to %3953, %3954 : !torch.vtensor<[80,576,576],f16>, !torch.list<int> -> !torch.vtensor<[80,576,576],f16>
    %3956 = torch.aten.bmm %3955, %3923 : !torch.vtensor<[80,576,576],f16>, !torch.vtensor<[80,576,64],f16> -> !torch.vtensor<[80,576,64],f16>
    %3957 = torch.prim.ListConstruct %int4, %int20, %int576, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3958 = torch.aten.view %3956, %3957 : !torch.vtensor<[80,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %3959 = torch.aten.permute %3958, %1239 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %3960 = torch.aten.clone %3959, %int0 : !torch.vtensor<[4,576,20,64],f16>, !torch.int -> !torch.vtensor<[4,576,20,64],f16>
    %3961 = torch.aten.view %3960, %3792 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3962 = torch.aten.view %3961, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3963 = torch.aten.abs %3962 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_148, %indices_149 = torch.aten.max.dim %3963, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %3964 = torch.aten.view %values_148, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %3965 = torch.aten.broadcast_to %3964, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %3966 = torch.aten.clone %3965, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %3967 = torch.aten.view %3966, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3968 = torch.aten.div.Scalar %3967, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3969 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3970 = torch.aten.detach %3969 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %3971 = torch.aten.div.Tensor %3961, %3968 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3972 = torch.aten.add.Tensor %3971, %3970, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3973 = torch.aten.round %3972 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3974 = torch.aten.clamp %3973, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3975 = torch.aten.sub.Tensor %3974, %3970, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3976 = torch.aten.mul.Tensor %3975, %3968 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3977 = torch.aten.broadcast_to %698, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %3978 = torch.aten.clone %3977, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %3979 = torch.aten.view %3978, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %3980 = torch.aten.mul.Tensor %697, %3979 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %3981 = torch.aten.transpose.int %3980, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %3982 = torch.aten.view %3976, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %3983 = torch.aten.mm %3982, %3981 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %3984 = torch.aten.mul.Scalar %696, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %3985 = torch.aten.add.Tensor %3984, %3983, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %3986 = torch.aten.view %3985, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3987 = torch.aten.add.Tensor %3986, %3824, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3988 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %3989 = torch.aten.sum.dim_IntList %3987, %3988, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %3990 = torch.aten.div.Scalar %3989, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %3991 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3992 = torch.aten.broadcast_to %3990, %3991 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %3993 = torch.aten.sub.Tensor %3987, %3992, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %3994 = torch.aten.mul.Tensor %3993, %3993 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %3995 = torch.aten.sum.dim_IntList %3994, %3988, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %3996 = torch.aten.div.Scalar %3995, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %3997 = torch.aten.add.Scalar %3996, %float1.000000e-05, %int1 : !torch.vtensor<[4,576,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %3998 = torch.aten.rsqrt %3997 : !torch.vtensor<[4,576,1],f16> -> !torch.vtensor<[4,576,1],f16>
    %3999 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4000 = torch.aten.broadcast_to %3998, %3999 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4001 = torch.aten.mul.Tensor %3993, %4000 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4002 = torch.aten.mul.Tensor %4001, %695 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4003 = torch.aten.add.Tensor %4002, %694, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4004 = torch.aten.view %4003, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4005 = torch.aten.abs %4004 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_150, %indices_151 = torch.aten.max.dim %4005, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4006 = torch.aten.view %values_150, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4007 = torch.aten.broadcast_to %4006, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4008 = torch.aten.clone %4007, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4009 = torch.aten.view %4008, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4010 = torch.aten.div.Scalar %4009, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4011 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4012 = torch.aten.detach %4011 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4013 = torch.aten.div.Tensor %4003, %4010 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4014 = torch.aten.add.Tensor %4013, %4012, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4015 = torch.aten.round %4014 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4016 = torch.aten.clamp %4015, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4017 = torch.aten.sub.Tensor %4016, %4012, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4018 = torch.aten.mul.Tensor %4017, %4010 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4019 = torch.aten.broadcast_to %693, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4020 = torch.aten.clone %4019, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4021 = torch.aten.view %4020, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4022 = torch.aten.mul.Tensor %692, %4021 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4023 = torch.aten.transpose.int %4022, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4024 = torch.aten.view %4018, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4025 = torch.aten.mm %4024, %4023 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4026 = torch.aten.view %4025, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4027 = torch.aten.view %4026, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4028 = torch.aten.permute %4027, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4029 = torch.aten.clone %4028, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %4030 = torch.aten.view %4029, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %4031 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4032 = torch.aten.abs %4031 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_152, %indices_153 = torch.aten.max.dim %4032, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %4033 = torch.aten.view %values_152, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %4034 = torch.aten.broadcast_to %4033, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4035 = torch.aten.clone %4034, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %4036 = torch.aten.view %4035, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %4037 = torch.aten.div.Scalar %4036, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4038 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4039 = torch.aten.detach %4038 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4040 = torch.aten.div.Tensor %arg2, %4037 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4041 = torch.aten.add.Tensor %4040, %4039, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4042 = torch.aten.round %4041 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4043 = torch.aten.clamp %4042, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4044 = torch.aten.sub.Tensor %4043, %4039, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4045 = torch.aten.mul.Tensor %4044, %4037 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4046 = torch.prim.ListConstruct %int1280, %int64, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4047 = torch.aten.broadcast_to %691, %4046 : !torch.vtensor<[1280,64,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,64,16],f16>
    %4048 = torch.aten.clone %4047, %int0 : !torch.vtensor<[1280,64,16],f16>, !torch.int -> !torch.vtensor<[1280,64,16],f16>
    %4049 = torch.prim.ListConstruct %int1280, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
    %4050 = torch.aten.view %4048, %4049 : !torch.vtensor<[1280,64,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1024],f16>
    %4051 = torch.aten.mul.Tensor %690, %4050 : !torch.vtensor<[1280,1024],si8>, !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1280,1024],f16>
    %4052 = torch.aten.transpose.int %4051, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16>
    %4053 = torch.aten.view %4045, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %4054 = torch.aten.mm %4053, %4052 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[308,1280],f16>
    %4055 = torch.prim.ListConstruct %int4, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4056 = torch.aten.view %4054, %4055 : !torch.vtensor<[308,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1280],f16>
    %4057 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4058 = torch.aten.abs %4057 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_154, %indices_155 = torch.aten.max.dim %4058, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %4059 = torch.aten.view %values_154, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %4060 = torch.aten.broadcast_to %4059, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4061 = torch.aten.clone %4060, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %4062 = torch.aten.view %4061, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %4063 = torch.aten.div.Scalar %4062, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4064 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4065 = torch.aten.detach %4064 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4066 = torch.aten.div.Tensor %arg2, %4063 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4067 = torch.aten.add.Tensor %4066, %4065, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4068 = torch.aten.round %4067 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4069 = torch.aten.clamp %4068, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4070 = torch.aten.sub.Tensor %4069, %4065, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4071 = torch.aten.mul.Tensor %4070, %4063 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4072 = torch.aten.broadcast_to %689, %4046 : !torch.vtensor<[1280,64,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,64,16],f16>
    %4073 = torch.aten.clone %4072, %int0 : !torch.vtensor<[1280,64,16],f16>, !torch.int -> !torch.vtensor<[1280,64,16],f16>
    %4074 = torch.aten.view %4073, %4049 : !torch.vtensor<[1280,64,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1024],f16>
    %4075 = torch.aten.mul.Tensor %688, %4074 : !torch.vtensor<[1280,1024],si8>, !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1280,1024],f16>
    %4076 = torch.aten.transpose.int %4075, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16>
    %4077 = torch.aten.view %4071, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %4078 = torch.aten.mm %4077, %4076 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[308,1280],f16>
    %4079 = torch.aten.view %4078, %4055 : !torch.vtensor<[308,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1280],f16>
    %4080 = torch.prim.ListConstruct %int4, %int77, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4081 = torch.aten.view %4056, %4080 : !torch.vtensor<[4,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,20,64],f16>
    %4082 = torch.aten.permute %4081, %1239 : !torch.vtensor<[4,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,77,64],f16>
    %4083 = torch.aten.clone %4082, %int0 : !torch.vtensor<[4,20,77,64],f16>, !torch.int -> !torch.vtensor<[4,20,77,64],f16>
    %4084 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4085 = torch.aten.view %4083, %4084 : !torch.vtensor<[4,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[80,77,64],f16>
    %4086 = torch.aten.view %4079, %4080 : !torch.vtensor<[4,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,20,64],f16>
    %4087 = torch.aten.permute %4086, %1239 : !torch.vtensor<[4,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,77,64],f16>
    %4088 = torch.aten.clone %4087, %int0 : !torch.vtensor<[4,20,77,64],f16>, !torch.int -> !torch.vtensor<[4,20,77,64],f16>
    %4089 = torch.aten.view %4088, %4084 : !torch.vtensor<[4,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[80,77,64],f16>
    %4090 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4091 = torch.aten.to.dtype %4090, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4092 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4093 = torch.aten.broadcast_to %4091, %4092 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4094 = torch.aten.to.dtype %4030, %int6, %false, %false, %none : !torch.vtensor<[80,576,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,64],f32>
    %4095 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4096 = torch.aten.broadcast_to %4094, %4095 : !torch.vtensor<[80,576,64],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4097 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4098 = torch.aten.to.dtype %4097, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4099 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4100 = torch.aten.broadcast_to %4098, %4099 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,77,64],f32>
    %4101 = torch.aten.to.dtype %4085, %int6, %false, %false, %none : !torch.vtensor<[80,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,77,64],f32>
    %4102 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4103 = torch.aten.broadcast_to %4101, %4102 : !torch.vtensor<[80,77,64],f32>, !torch.list<int> -> !torch.vtensor<[80,77,64],f32>
    %4104 = torch.prim.ListConstruct %int80, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4105 = torch.aten.empty.memory_format %4104, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[80,576,77],f32>
    %4106 = torch.aten.transpose.int %4103, %int-1, %int-2 : !torch.vtensor<[80,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[80,64,77],f32>
    %4107 = torch.aten.bmm %4096, %4106 : !torch.vtensor<[80,576,64],f32>, !torch.vtensor<[80,64,77],f32> -> !torch.vtensor<[80,576,77],f32>
    %4108 = torch.aten.mul.Scalar %4107, %float1.250000e-01 : !torch.vtensor<[80,576,77],f32>, !torch.float -> !torch.vtensor<[80,576,77],f32>
    %4109 = torch.aten.add.Tensor %4108, %4105, %int0 : !torch.vtensor<[80,576,77],f32>, !torch.vtensor<[80,576,77],f32>, !torch.int -> !torch.vtensor<[80,576,77],f32>
    %values_156, %indices_157 = torch.aten.max.dim %4109, %int-1, %true : !torch.vtensor<[80,576,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[80,576,1],f32>, !torch.vtensor<[80,576,1],si64>
    %4110 = torch.aten.sub.Tensor %4109, %values_156, %float1.000000e00 : !torch.vtensor<[80,576,77],f32>, !torch.vtensor<[80,576,1],f32>, !torch.float -> !torch.vtensor<[80,576,77],f32>
    %4111 = torch.aten.exp %4110 : !torch.vtensor<[80,576,77],f32> -> !torch.vtensor<[80,576,77],f32>
    %4112 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4113 = torch.aten.sum.dim_IntList %4111, %4112, %true, %none : !torch.vtensor<[80,576,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[80,576,1],f32>
    %4114 = torch.aten.div.Tensor %4111, %4113 : !torch.vtensor<[80,576,77],f32>, !torch.vtensor<[80,576,1],f32> -> !torch.vtensor<[80,576,77],f32>
    %4115 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4116 = torch.aten.to.dtype %4115, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4117 = torch.prim.ListConstruct %int80, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4118 = torch.aten.broadcast_to %4116, %4117 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[80,576,77],f16>
    %4119 = torch.aten.to.dtype %4114, %int5, %false, %false, %none : !torch.vtensor<[80,576,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,77],f16>
    %4120 = torch.prim.ListConstruct %int80, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4121 = torch.aten.broadcast_to %4119, %4120 : !torch.vtensor<[80,576,77],f16>, !torch.list<int> -> !torch.vtensor<[80,576,77],f16>
    %4122 = torch.aten.bmm %4121, %4089 : !torch.vtensor<[80,576,77],f16>, !torch.vtensor<[80,77,64],f16> -> !torch.vtensor<[80,576,64],f16>
    %4123 = torch.aten.view %4122, %3957 : !torch.vtensor<[80,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4124 = torch.aten.permute %4123, %1239 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4125 = torch.aten.clone %4124, %int0 : !torch.vtensor<[4,576,20,64],f16>, !torch.int -> !torch.vtensor<[4,576,20,64],f16>
    %4126 = torch.aten.view %4125, %3792 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4127 = torch.aten.view %4126, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4128 = torch.aten.abs %4127 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_158, %indices_159 = torch.aten.max.dim %4128, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4129 = torch.aten.view %values_158, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4130 = torch.aten.broadcast_to %4129, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4131 = torch.aten.clone %4130, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4132 = torch.aten.view %4131, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4133 = torch.aten.div.Scalar %4132, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4134 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4135 = torch.aten.detach %4134 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4136 = torch.aten.div.Tensor %4126, %4133 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4137 = torch.aten.add.Tensor %4136, %4135, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4138 = torch.aten.round %4137 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4139 = torch.aten.clamp %4138, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4140 = torch.aten.sub.Tensor %4139, %4135, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4141 = torch.aten.mul.Tensor %4140, %4133 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4142 = torch.aten.broadcast_to %687, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4143 = torch.aten.clone %4142, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4144 = torch.aten.view %4143, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4145 = torch.aten.mul.Tensor %686, %4144 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4146 = torch.aten.transpose.int %4145, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4147 = torch.aten.view %4141, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4148 = torch.aten.mm %4147, %4146 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4149 = torch.aten.mul.Scalar %685, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4150 = torch.aten.add.Tensor %4149, %4148, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4151 = torch.aten.view %4150, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4152 = torch.aten.add.Tensor %4151, %3987, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4153 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4154 = torch.aten.sum.dim_IntList %4152, %4153, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4155 = torch.aten.div.Scalar %4154, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4156 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4157 = torch.aten.broadcast_to %4155, %4156 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4158 = torch.aten.sub.Tensor %4152, %4157, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4159 = torch.aten.mul.Tensor %4158, %4158 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4160 = torch.aten.sum.dim_IntList %4159, %4153, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4161 = torch.aten.div.Scalar %4160, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4162 = torch.aten.add.Scalar %4161, %float1.000000e-05, %int1 : !torch.vtensor<[4,576,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4163 = torch.aten.rsqrt %4162 : !torch.vtensor<[4,576,1],f16> -> !torch.vtensor<[4,576,1],f16>
    %4164 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4165 = torch.aten.broadcast_to %4163, %4164 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4166 = torch.aten.mul.Tensor %4158, %4165 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4167 = torch.aten.mul.Tensor %4166, %684 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4168 = torch.aten.add.Tensor %4167, %683, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4169 = torch.aten.view %4168, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4170 = torch.aten.abs %4169 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_160, %indices_161 = torch.aten.max.dim %4170, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4171 = torch.aten.view %values_160, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4172 = torch.aten.broadcast_to %4171, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4173 = torch.aten.clone %4172, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4174 = torch.aten.view %4173, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4175 = torch.aten.div.Scalar %4174, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4176 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4177 = torch.aten.detach %4176 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4178 = torch.aten.div.Tensor %4168, %4175 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4179 = torch.aten.add.Tensor %4178, %4177, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4180 = torch.aten.round %4179 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4181 = torch.aten.clamp %4180, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4182 = torch.aten.sub.Tensor %4181, %4177, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4183 = torch.aten.mul.Tensor %4182, %4175 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4184 = torch.prim.ListConstruct %int10240, %int80, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4185 = torch.aten.broadcast_to %682, %4184 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16>
    %4186 = torch.aten.clone %4185, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16>
    %4187 = torch.prim.ListConstruct %int10240, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %4188 = torch.aten.view %4186, %4187 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16>
    %4189 = torch.aten.mul.Tensor %681, %4188 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16>
    %4190 = torch.aten.transpose.int %4189, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %4191 = torch.aten.view %4183, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4192 = torch.aten.mm %4191, %4190 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[2304,10240],f16>
    %4193 = torch.aten.mul.Scalar %680, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %4194 = torch.aten.add.Tensor %4193, %4192, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[2304,10240],f16>, !torch.int -> !torch.vtensor<[2304,10240],f16>
    %4195 = torch.prim.ListConstruct %int4, %int576, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4196 = torch.aten.view %4194, %4195 : !torch.vtensor<[2304,10240],f16>, !torch.list<int> -> !torch.vtensor<[4,576,10240],f16>
    %4197 = torch.aten.slice.Tensor %4196, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[4,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4198 = torch.aten.slice.Tensor %4196, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[4,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4199 = torch.aten.gelu %4198, %str : !torch.vtensor<[4,576,5120],f16>, !torch.str -> !torch.vtensor<[4,576,5120],f16>
    %4200 = torch.aten.mul.Tensor %4197, %4199 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4201 = torch.prim.ListConstruct %int4, %int576, %int320, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4202 = torch.aten.view %4200, %4201 : !torch.vtensor<[4,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[4,576,320,16],f16>
    %4203 = torch.aten.abs %4202 : !torch.vtensor<[4,576,320,16],f16> -> !torch.vtensor<[4,576,320,16],f16>
    %values_162, %indices_163 = torch.aten.max.dim %4203, %int3, %true : !torch.vtensor<[4,576,320,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,320,1],f16>, !torch.vtensor<[4,576,320,1],si64>
    %4204 = torch.prim.ListConstruct %int4, %int576, %int320, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4205 = torch.aten.view %values_162, %4204 : !torch.vtensor<[4,576,320,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,320,1],f16>
    %4206 = torch.aten.broadcast_to %4205, %4201 : !torch.vtensor<[4,576,320,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,320,16],f16>
    %4207 = torch.aten.clone %4206, %int0 : !torch.vtensor<[4,576,320,16],f16>, !torch.int -> !torch.vtensor<[4,576,320,16],f16>
    %4208 = torch.prim.ListConstruct %int4, %int576, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4209 = torch.aten.view %4207, %4208 : !torch.vtensor<[4,576,320,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,5120],f16>
    %4210 = torch.aten.div.Scalar %4209, %int128 : !torch.vtensor<[4,576,5120],f16>, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4211 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4212 = torch.aten.detach %4211 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4213 = torch.aten.div.Tensor %4200, %4210 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4214 = torch.aten.add.Tensor %4213, %4212, %int1 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4215 = torch.aten.round %4214 : !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4216 = torch.aten.clamp %4215, %int-128, %int127 : !torch.vtensor<[4,576,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4217 = torch.aten.sub.Tensor %4216, %4212, %int1 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4218 = torch.aten.mul.Tensor %4217, %4210 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4219 = torch.prim.ListConstruct %int1280, %int320, %int16 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4220 = torch.aten.broadcast_to %679, %4219 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16>
    %4221 = torch.aten.clone %4220, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16>
    %4222 = torch.prim.ListConstruct %int1280, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
    %4223 = torch.aten.view %4221, %4222 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16>
    %4224 = torch.aten.mul.Tensor %678, %4223 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16>
    %4225 = torch.aten.transpose.int %4224, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %4226 = torch.prim.ListConstruct %int2304, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
    %4227 = torch.aten.view %4218, %4226 : !torch.vtensor<[4,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[2304,5120],f16>
    %4228 = torch.aten.mm %4227, %4225 : !torch.vtensor<[2304,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4229 = torch.aten.mul.Scalar %677, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4230 = torch.aten.add.Tensor %4229, %4228, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4231 = torch.aten.view %4230, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4232 = torch.aten.add.Tensor %4231, %4152, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4233 = torch.aten.view %4232, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4234 = torch.aten.abs %4233 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_164, %indices_165 = torch.aten.max.dim %4234, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4235 = torch.aten.view %values_164, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4236 = torch.aten.broadcast_to %4235, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4237 = torch.aten.clone %4236, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4238 = torch.aten.view %4237, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4239 = torch.aten.div.Scalar %4238, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4240 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4241 = torch.aten.detach %4240 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4242 = torch.aten.div.Tensor %4232, %4239 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4243 = torch.aten.add.Tensor %4242, %4241, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4244 = torch.aten.round %4243 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4245 = torch.aten.clamp %4244, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4246 = torch.aten.sub.Tensor %4245, %4241, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4247 = torch.aten.mul.Tensor %4246, %4239 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4248 = torch.aten.broadcast_to %676, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4249 = torch.aten.clone %4248, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4250 = torch.aten.view %4249, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4251 = torch.aten.mul.Tensor %675, %4250 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4252 = torch.aten.transpose.int %4251, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4253 = torch.aten.view %4247, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4254 = torch.aten.mm %4253, %4252 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4255 = torch.aten.mul.Scalar %674, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4256 = torch.aten.add.Tensor %4255, %4254, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4257 = torch.aten.view %4256, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4258 = torch.prim.ListConstruct %int4, %int24, %int24, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4259 = torch.aten.view %4257, %4258 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,24,24,1280],f16>
    %4260 = torch.aten.permute %4259, %1638 : !torch.vtensor<[4,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4261 = torch.aten.clone %4260, %int0 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4262 = torch.aten.add.Tensor %4261, %3752, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4263 = torch.aten.view %4262, %3663 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f16>
    %4264 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4265 = torch.aten.to.dtype %4264, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4266 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4267 = torch.aten.broadcast_to %4265, %4266 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %4268 = torch.aten.to.dtype %4263, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f32>
    %4269 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4270 = torch.aten.broadcast_to %4268, %4269 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %4271 = torch.aten.to.dtype %4270, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f64>
    %4272 = torch.aten.sum.dim_IntList %4271, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4273 = torch.aten.div.Scalar %4272, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4274 = torch.aten.sub.Tensor %4271, %4273, %float1.000000e00 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,576],f64>
    %4275 = torch.aten.mul.Tensor %4274, %4274 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,40,576],f64> -> !torch.vtensor<[4,32,40,576],f64>
    %4276 = torch.aten.sum.dim_IntList %4275, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4277 = torch.aten.div.Scalar %4276, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4278 = torch.aten.to.dtype %4277, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4279 = torch.aten.sum.dim_IntList %4270, %996, %true, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4280 = torch.aten.div.Scalar %4279, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4281 = torch.aten.add.Tensor %4278, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4282 = torch.aten.rsqrt %4281 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %4283 = torch.aten.sub.Tensor %4263, %4280, %int1 : !torch.vtensor<[4,32,40,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,576],f32>
    %4284 = torch.aten.mul.Tensor %4283, %4282 : !torch.vtensor<[4,32,40,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,576],f32>
    %4285 = torch.aten.view %4284, %3686 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f32>
    %4286 = torch.aten.unsqueeze %673, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4287 = torch.aten.unsqueeze %4286, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4288 = torch.aten.unsqueeze %4287, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4289 = torch.aten.unsqueeze %672, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4290 = torch.aten.unsqueeze %4289, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4291 = torch.aten.unsqueeze %4290, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4292 = torch.aten.mul.Tensor %4285, %4291 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,24,24],f32>
    %4293 = torch.aten.add.Tensor %4292, %4288, %int1 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f32>
    %4294 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4295 = torch.aten.to.dtype %4294, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4296 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4297 = torch.aten.broadcast_to %4295, %4296 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4298 = torch.aten.to.dtype %4293, %int5, %false, %false, %none : !torch.vtensor<[4,1280,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,24,24],f16>
    %4299 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4300 = torch.aten.broadcast_to %4298, %4299 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4301 = torch.aten.sigmoid %4300 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4302 = torch.aten.mul.Tensor %4301, %4300 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4303 = torch.aten.view %4302, %3705 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %4304 = torch.aten.abs %4303 : !torch.vtensor<[4,80,16,24,24],f16> -> !torch.vtensor<[4,80,16,24,24],f16>
    %values_166, %indices_167 = torch.aten.max.dim %4304, %int2, %true : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,24,24],f16>, !torch.vtensor<[4,80,1,24,24],si64>
    %4305 = torch.aten.view %values_166, %3708 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,24,24],f16>
    %4306 = torch.aten.broadcast_to %4305, %3705 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %4307 = torch.aten.clone %4306, %int0 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,80,16,24,24],f16>
    %4308 = torch.aten.view %4307, %3686 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4309 = torch.aten.div.Scalar %4308, %int128 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4310 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4311 = torch.aten.detach %4310 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4312 = torch.aten.div.Tensor %4302, %4309 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4313 = torch.aten.add.Tensor %4312, %4311, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4314 = torch.aten.round %4313 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4315 = torch.aten.clamp %4314, %int-128, %int127 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4316 = torch.aten.sub.Tensor %4315, %4311, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4317 = torch.aten.mul.Tensor %4316, %4309 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4318 = torch.aten.broadcast_to %671, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4319 = torch.aten.clone %4318, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4320 = torch.aten.view %4319, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4321 = torch.aten.mul.Tensor %670, %4320 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4322 = torch.aten.convolution %4317, %4321, %669, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4323 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %4324 = torch.aten.mul.Tensor %4323, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %4325 = torch.aten.transpose.int %668, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4326 = torch.aten.mm %4324, %4325 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %4327 = torch.aten.mul.Scalar %667, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4328 = torch.aten.add.Tensor %4327, %4326, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %4329 = torch.aten.unsqueeze %4328, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %4330 = torch.aten.unsqueeze %4329, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %4331 = torch.aten.add.Tensor %4322, %4330, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4332 = torch.aten.view %4331, %3663 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f16>
    %4333 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4334 = torch.aten.to.dtype %4333, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4335 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4336 = torch.aten.broadcast_to %4334, %4335 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %4337 = torch.aten.to.dtype %4332, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f32>
    %4338 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4339 = torch.aten.broadcast_to %4337, %4338 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %4340 = torch.aten.to.dtype %4339, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f64>
    %4341 = torch.aten.sum.dim_IntList %4340, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4342 = torch.aten.div.Scalar %4341, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4343 = torch.aten.sub.Tensor %4340, %4342, %float1.000000e00 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,576],f64>
    %4344 = torch.aten.mul.Tensor %4343, %4343 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,40,576],f64> -> !torch.vtensor<[4,32,40,576],f64>
    %4345 = torch.aten.sum.dim_IntList %4344, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4346 = torch.aten.div.Scalar %4345, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4347 = torch.aten.to.dtype %4346, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4348 = torch.aten.sum.dim_IntList %4339, %996, %true, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4349 = torch.aten.div.Scalar %4348, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4350 = torch.aten.add.Tensor %4347, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4351 = torch.aten.rsqrt %4350 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %4352 = torch.aten.sub.Tensor %4332, %4349, %int1 : !torch.vtensor<[4,32,40,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,576],f32>
    %4353 = torch.aten.mul.Tensor %4352, %4351 : !torch.vtensor<[4,32,40,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,576],f32>
    %4354 = torch.aten.view %4353, %3686 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f32>
    %4355 = torch.aten.unsqueeze %666, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4356 = torch.aten.unsqueeze %4355, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4357 = torch.aten.unsqueeze %4356, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4358 = torch.aten.unsqueeze %665, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4359 = torch.aten.unsqueeze %4358, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4360 = torch.aten.unsqueeze %4359, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4361 = torch.aten.mul.Tensor %4354, %4360 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,24,24],f32>
    %4362 = torch.aten.add.Tensor %4361, %4357, %int1 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f32>
    %4363 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4364 = torch.aten.to.dtype %4363, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4365 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4366 = torch.aten.broadcast_to %4364, %4365 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4367 = torch.aten.to.dtype %4362, %int5, %false, %false, %none : !torch.vtensor<[4,1280,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,24,24],f16>
    %4368 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4369 = torch.aten.broadcast_to %4367, %4368 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4370 = torch.aten.sigmoid %4369 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4371 = torch.aten.mul.Tensor %4370, %4369 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4372 = torch.aten.view %4371, %3705 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %4373 = torch.aten.abs %4372 : !torch.vtensor<[4,80,16,24,24],f16> -> !torch.vtensor<[4,80,16,24,24],f16>
    %values_168, %indices_169 = torch.aten.max.dim %4373, %int2, %true : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,24,24],f16>, !torch.vtensor<[4,80,1,24,24],si64>
    %4374 = torch.aten.view %values_168, %3708 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,24,24],f16>
    %4375 = torch.aten.broadcast_to %4374, %3705 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %4376 = torch.aten.clone %4375, %int0 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,80,16,24,24],f16>
    %4377 = torch.aten.view %4376, %3686 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4378 = torch.aten.div.Scalar %4377, %int128 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4379 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4380 = torch.aten.detach %4379 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4381 = torch.aten.div.Tensor %4371, %4378 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4382 = torch.aten.add.Tensor %4381, %4380, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4383 = torch.aten.round %4382 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4384 = torch.aten.clamp %4383, %int-128, %int127 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4385 = torch.aten.sub.Tensor %4384, %4380, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4386 = torch.aten.mul.Tensor %4385, %4378 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4387 = torch.aten.broadcast_to %664, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4388 = torch.aten.clone %4387, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4389 = torch.aten.view %4388, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4390 = torch.aten.mul.Tensor %663, %4389 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4391 = torch.aten.convolution %4386, %4390, %662, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4392 = torch.aten.add.Tensor %4262, %4391, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4393 = torch.aten.div.Tensor %4392, %925 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4394 = torch.aten.view %4393, %3663 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f16>
    %4395 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4396 = torch.aten.to.dtype %4395, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4397 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4398 = torch.aten.broadcast_to %4396, %4397 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %4399 = torch.aten.to.dtype %4394, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f32>
    %4400 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4401 = torch.aten.broadcast_to %4399, %4400 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %4402 = torch.aten.to.dtype %4401, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f64>
    %4403 = torch.aten.sum.dim_IntList %4402, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4404 = torch.aten.div.Scalar %4403, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4405 = torch.aten.sub.Tensor %4402, %4404, %float1.000000e00 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,576],f64>
    %4406 = torch.aten.mul.Tensor %4405, %4405 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,40,576],f64> -> !torch.vtensor<[4,32,40,576],f64>
    %4407 = torch.aten.sum.dim_IntList %4406, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4408 = torch.aten.div.Scalar %4407, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4409 = torch.aten.to.dtype %4408, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4410 = torch.aten.sum.dim_IntList %4401, %996, %true, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4411 = torch.aten.div.Scalar %4410, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4412 = torch.aten.add.Tensor %4409, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4413 = torch.aten.rsqrt %4412 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %4414 = torch.aten.sub.Tensor %4394, %4411, %int1 : !torch.vtensor<[4,32,40,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,576],f32>
    %4415 = torch.aten.mul.Tensor %4414, %4413 : !torch.vtensor<[4,32,40,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,576],f32>
    %4416 = torch.aten.view %4415, %3686 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f32>
    %4417 = torch.aten.unsqueeze %661, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4418 = torch.aten.unsqueeze %4417, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4419 = torch.aten.unsqueeze %4418, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4420 = torch.aten.unsqueeze %660, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4421 = torch.aten.unsqueeze %4420, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4422 = torch.aten.unsqueeze %4421, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4423 = torch.aten.mul.Tensor %4416, %4422 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,24,24],f32>
    %4424 = torch.aten.add.Tensor %4423, %4419, %int1 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f32>
    %4425 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4426 = torch.aten.to.dtype %4425, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4427 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4428 = torch.aten.broadcast_to %4426, %4427 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4429 = torch.aten.to.dtype %4424, %int5, %false, %false, %none : !torch.vtensor<[4,1280,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,24,24],f16>
    %4430 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4431 = torch.aten.broadcast_to %4429, %4430 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4432 = torch.aten.permute %4431, %1163 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,24,24,1280],f16>
    %4433 = torch.aten.view %4432, %3792 : !torch.vtensor<[4,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4434 = torch.aten.clone %4433, %int0 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4435 = torch.aten.view %4434, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4436 = torch.aten.abs %4435 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_170, %indices_171 = torch.aten.max.dim %4436, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4437 = torch.aten.view %values_170, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4438 = torch.aten.broadcast_to %4437, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4439 = torch.aten.clone %4438, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4440 = torch.aten.view %4439, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4441 = torch.aten.div.Scalar %4440, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4442 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4443 = torch.aten.detach %4442 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4444 = torch.aten.div.Tensor %4434, %4441 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4445 = torch.aten.add.Tensor %4444, %4443, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4446 = torch.aten.round %4445 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4447 = torch.aten.clamp %4446, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4448 = torch.aten.sub.Tensor %4447, %4443, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4449 = torch.aten.mul.Tensor %4448, %4441 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4450 = torch.aten.broadcast_to %659, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4451 = torch.aten.clone %4450, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4452 = torch.aten.view %4451, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4453 = torch.aten.mul.Tensor %658, %4452 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4454 = torch.aten.transpose.int %4453, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4455 = torch.aten.view %4449, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4456 = torch.aten.mm %4455, %4454 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4457 = torch.aten.mul.Scalar %657, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4458 = torch.aten.add.Tensor %4457, %4456, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4459 = torch.aten.view %4458, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4460 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4461 = torch.aten.sum.dim_IntList %4459, %4460, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4462 = torch.aten.div.Scalar %4461, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4463 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4464 = torch.aten.broadcast_to %4462, %4463 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4465 = torch.aten.sub.Tensor %4459, %4464, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4466 = torch.aten.mul.Tensor %4465, %4465 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4467 = torch.aten.sum.dim_IntList %4466, %4460, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4468 = torch.aten.div.Scalar %4467, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4469 = torch.aten.add.Scalar %4468, %float1.000000e-05, %int1 : !torch.vtensor<[4,576,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4470 = torch.aten.rsqrt %4469 : !torch.vtensor<[4,576,1],f16> -> !torch.vtensor<[4,576,1],f16>
    %4471 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4472 = torch.aten.broadcast_to %4470, %4471 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4473 = torch.aten.mul.Tensor %4465, %4472 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4474 = torch.aten.mul.Tensor %4473, %656 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4475 = torch.aten.add.Tensor %4474, %655, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4476 = torch.aten.view %4475, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4477 = torch.aten.abs %4476 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_172, %indices_173 = torch.aten.max.dim %4477, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4478 = torch.aten.view %values_172, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4479 = torch.aten.broadcast_to %4478, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4480 = torch.aten.clone %4479, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4481 = torch.aten.view %4480, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4482 = torch.aten.div.Scalar %4481, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4483 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4484 = torch.aten.detach %4483 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4485 = torch.aten.div.Tensor %4475, %4482 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4486 = torch.aten.add.Tensor %4485, %4484, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4487 = torch.aten.round %4486 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4488 = torch.aten.clamp %4487, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4489 = torch.aten.sub.Tensor %4488, %4484, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4490 = torch.aten.mul.Tensor %4489, %4482 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4491 = torch.aten.broadcast_to %654, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4492 = torch.aten.clone %4491, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4493 = torch.aten.view %4492, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4494 = torch.aten.mul.Tensor %653, %4493 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4495 = torch.aten.transpose.int %4494, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4496 = torch.aten.view %4490, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4497 = torch.aten.mm %4496, %4495 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4498 = torch.aten.view %4497, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4499 = torch.aten.view %4498, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4500 = torch.aten.permute %4499, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4501 = torch.aten.clone %4500, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %4502 = torch.aten.view %4501, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %4503 = torch.aten.view %4475, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4504 = torch.aten.abs %4503 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_174, %indices_175 = torch.aten.max.dim %4504, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4505 = torch.aten.view %values_174, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4506 = torch.aten.broadcast_to %4505, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4507 = torch.aten.clone %4506, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4508 = torch.aten.view %4507, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4509 = torch.aten.div.Scalar %4508, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4510 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4511 = torch.aten.detach %4510 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4512 = torch.aten.div.Tensor %4475, %4509 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4513 = torch.aten.add.Tensor %4512, %4511, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4514 = torch.aten.round %4513 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4515 = torch.aten.clamp %4514, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4516 = torch.aten.sub.Tensor %4515, %4511, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4517 = torch.aten.mul.Tensor %4516, %4509 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4518 = torch.aten.broadcast_to %652, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4519 = torch.aten.clone %4518, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4520 = torch.aten.view %4519, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4521 = torch.aten.mul.Tensor %651, %4520 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4522 = torch.aten.transpose.int %4521, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4523 = torch.aten.view %4517, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4524 = torch.aten.mm %4523, %4522 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4525 = torch.aten.view %4524, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4526 = torch.aten.view %4475, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4527 = torch.aten.abs %4526 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_176, %indices_177 = torch.aten.max.dim %4527, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4528 = torch.aten.view %values_176, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4529 = torch.aten.broadcast_to %4528, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4530 = torch.aten.clone %4529, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4531 = torch.aten.view %4530, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4532 = torch.aten.div.Scalar %4531, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4533 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4534 = torch.aten.detach %4533 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4535 = torch.aten.div.Tensor %4475, %4532 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4536 = torch.aten.add.Tensor %4535, %4534, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4537 = torch.aten.round %4536 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4538 = torch.aten.clamp %4537, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4539 = torch.aten.sub.Tensor %4538, %4534, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4540 = torch.aten.mul.Tensor %4539, %4532 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4541 = torch.aten.broadcast_to %650, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4542 = torch.aten.clone %4541, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4543 = torch.aten.view %4542, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4544 = torch.aten.mul.Tensor %649, %4543 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4545 = torch.aten.transpose.int %4544, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4546 = torch.aten.view %4540, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4547 = torch.aten.mm %4546, %4545 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4548 = torch.aten.view %4547, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4549 = torch.aten.view %4525, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4550 = torch.aten.permute %4549, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4551 = torch.aten.clone %4550, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %4552 = torch.aten.view %4551, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %4553 = torch.aten.view %4548, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4554 = torch.aten.permute %4553, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4555 = torch.aten.clone %4554, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %4556 = torch.aten.view %4555, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %4557 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4558 = torch.aten.to.dtype %4557, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4559 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4560 = torch.aten.broadcast_to %4558, %4559 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4561 = torch.aten.to.dtype %4502, %int6, %false, %false, %none : !torch.vtensor<[80,576,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,64],f32>
    %4562 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4563 = torch.aten.broadcast_to %4561, %4562 : !torch.vtensor<[80,576,64],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4564 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4565 = torch.aten.to.dtype %4564, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4566 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4567 = torch.aten.broadcast_to %4565, %4566 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4568 = torch.aten.to.dtype %4552, %int6, %false, %false, %none : !torch.vtensor<[80,576,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,64],f32>
    %4569 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4570 = torch.aten.broadcast_to %4568, %4569 : !torch.vtensor<[80,576,64],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4571 = torch.aten.empty.memory_format %3938, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[80,576,576],f32>
    %4572 = torch.aten.transpose.int %4570, %int-1, %int-2 : !torch.vtensor<[80,576,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[80,64,576],f32>
    %4573 = torch.aten.bmm %4563, %4572 : !torch.vtensor<[80,576,64],f32>, !torch.vtensor<[80,64,576],f32> -> !torch.vtensor<[80,576,576],f32>
    %4574 = torch.aten.mul.Scalar %4573, %float1.250000e-01 : !torch.vtensor<[80,576,576],f32>, !torch.float -> !torch.vtensor<[80,576,576],f32>
    %4575 = torch.aten.add.Tensor %4574, %4571, %int0 : !torch.vtensor<[80,576,576],f32>, !torch.vtensor<[80,576,576],f32>, !torch.int -> !torch.vtensor<[80,576,576],f32>
    %values_178, %indices_179 = torch.aten.max.dim %4575, %int-1, %true : !torch.vtensor<[80,576,576],f32>, !torch.int, !torch.bool -> !torch.vtensor<[80,576,1],f32>, !torch.vtensor<[80,576,1],si64>
    %4576 = torch.aten.sub.Tensor %4575, %values_178, %float1.000000e00 : !torch.vtensor<[80,576,576],f32>, !torch.vtensor<[80,576,1],f32>, !torch.float -> !torch.vtensor<[80,576,576],f32>
    %4577 = torch.aten.exp %4576 : !torch.vtensor<[80,576,576],f32> -> !torch.vtensor<[80,576,576],f32>
    %4578 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4579 = torch.aten.sum.dim_IntList %4577, %4578, %true, %none : !torch.vtensor<[80,576,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[80,576,1],f32>
    %4580 = torch.aten.div.Tensor %4577, %4579 : !torch.vtensor<[80,576,576],f32>, !torch.vtensor<[80,576,1],f32> -> !torch.vtensor<[80,576,576],f32>
    %4581 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4582 = torch.aten.to.dtype %4581, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4583 = torch.prim.ListConstruct %int80, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4584 = torch.aten.broadcast_to %4582, %4583 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[80,576,576],f16>
    %4585 = torch.aten.to.dtype %4580, %int5, %false, %false, %none : !torch.vtensor<[80,576,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,576],f16>
    %4586 = torch.prim.ListConstruct %int80, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4587 = torch.aten.broadcast_to %4585, %4586 : !torch.vtensor<[80,576,576],f16>, !torch.list<int> -> !torch.vtensor<[80,576,576],f16>
    %4588 = torch.aten.bmm %4587, %4556 : !torch.vtensor<[80,576,576],f16>, !torch.vtensor<[80,576,64],f16> -> !torch.vtensor<[80,576,64],f16>
    %4589 = torch.aten.view %4588, %3957 : !torch.vtensor<[80,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4590 = torch.aten.permute %4589, %1239 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4591 = torch.aten.clone %4590, %int0 : !torch.vtensor<[4,576,20,64],f16>, !torch.int -> !torch.vtensor<[4,576,20,64],f16>
    %4592 = torch.aten.view %4591, %3792 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4593 = torch.aten.view %4592, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4594 = torch.aten.abs %4593 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_180, %indices_181 = torch.aten.max.dim %4594, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4595 = torch.aten.view %values_180, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4596 = torch.aten.broadcast_to %4595, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4597 = torch.aten.clone %4596, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4598 = torch.aten.view %4597, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4599 = torch.aten.div.Scalar %4598, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4600 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4601 = torch.aten.detach %4600 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4602 = torch.aten.div.Tensor %4592, %4599 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4603 = torch.aten.add.Tensor %4602, %4601, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4604 = torch.aten.round %4603 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4605 = torch.aten.clamp %4604, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4606 = torch.aten.sub.Tensor %4605, %4601, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4607 = torch.aten.mul.Tensor %4606, %4599 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4608 = torch.aten.broadcast_to %648, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4609 = torch.aten.clone %4608, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4610 = torch.aten.view %4609, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4611 = torch.aten.mul.Tensor %647, %4610 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4612 = torch.aten.transpose.int %4611, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4613 = torch.aten.view %4607, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4614 = torch.aten.mm %4613, %4612 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4615 = torch.aten.mul.Scalar %646, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4616 = torch.aten.add.Tensor %4615, %4614, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4617 = torch.aten.view %4616, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4618 = torch.aten.add.Tensor %4617, %4459, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4619 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4620 = torch.aten.sum.dim_IntList %4618, %4619, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4621 = torch.aten.div.Scalar %4620, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4622 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4623 = torch.aten.broadcast_to %4621, %4622 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4624 = torch.aten.sub.Tensor %4618, %4623, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4625 = torch.aten.mul.Tensor %4624, %4624 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4626 = torch.aten.sum.dim_IntList %4625, %4619, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4627 = torch.aten.div.Scalar %4626, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4628 = torch.aten.add.Scalar %4627, %float1.000000e-05, %int1 : !torch.vtensor<[4,576,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4629 = torch.aten.rsqrt %4628 : !torch.vtensor<[4,576,1],f16> -> !torch.vtensor<[4,576,1],f16>
    %4630 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4631 = torch.aten.broadcast_to %4629, %4630 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4632 = torch.aten.mul.Tensor %4624, %4631 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4633 = torch.aten.mul.Tensor %4632, %645 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4634 = torch.aten.add.Tensor %4633, %644, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4635 = torch.aten.view %4634, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4636 = torch.aten.abs %4635 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_182, %indices_183 = torch.aten.max.dim %4636, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4637 = torch.aten.view %values_182, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4638 = torch.aten.broadcast_to %4637, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4639 = torch.aten.clone %4638, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4640 = torch.aten.view %4639, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4641 = torch.aten.div.Scalar %4640, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4642 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4643 = torch.aten.detach %4642 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4644 = torch.aten.div.Tensor %4634, %4641 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4645 = torch.aten.add.Tensor %4644, %4643, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4646 = torch.aten.round %4645 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4647 = torch.aten.clamp %4646, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4648 = torch.aten.sub.Tensor %4647, %4643, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4649 = torch.aten.mul.Tensor %4648, %4641 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4650 = torch.aten.broadcast_to %643, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4651 = torch.aten.clone %4650, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4652 = torch.aten.view %4651, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4653 = torch.aten.mul.Tensor %642, %4652 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4654 = torch.aten.transpose.int %4653, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4655 = torch.aten.view %4649, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4656 = torch.aten.mm %4655, %4654 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4657 = torch.aten.view %4656, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4658 = torch.aten.view %4657, %3864 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4659 = torch.aten.permute %4658, %1239 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4660 = torch.aten.clone %4659, %int0 : !torch.vtensor<[4,20,576,64],f16>, !torch.int -> !torch.vtensor<[4,20,576,64],f16>
    %4661 = torch.aten.view %4660, %3868 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[80,576,64],f16>
    %4662 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4663 = torch.aten.abs %4662 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_184, %indices_185 = torch.aten.max.dim %4663, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %4664 = torch.aten.view %values_184, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %4665 = torch.aten.broadcast_to %4664, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4666 = torch.aten.clone %4665, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %4667 = torch.aten.view %4666, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %4668 = torch.aten.div.Scalar %4667, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4669 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4670 = torch.aten.detach %4669 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4671 = torch.aten.div.Tensor %arg2, %4668 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4672 = torch.aten.add.Tensor %4671, %4670, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4673 = torch.aten.round %4672 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4674 = torch.aten.clamp %4673, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4675 = torch.aten.sub.Tensor %4674, %4670, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4676 = torch.aten.mul.Tensor %4675, %4668 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4677 = torch.aten.broadcast_to %641, %4046 : !torch.vtensor<[1280,64,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,64,16],f16>
    %4678 = torch.aten.clone %4677, %int0 : !torch.vtensor<[1280,64,16],f16>, !torch.int -> !torch.vtensor<[1280,64,16],f16>
    %4679 = torch.aten.view %4678, %4049 : !torch.vtensor<[1280,64,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1024],f16>
    %4680 = torch.aten.mul.Tensor %640, %4679 : !torch.vtensor<[1280,1024],si8>, !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1280,1024],f16>
    %4681 = torch.aten.transpose.int %4680, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16>
    %4682 = torch.aten.view %4676, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %4683 = torch.aten.mm %4682, %4681 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[308,1280],f16>
    %4684 = torch.aten.view %4683, %4055 : !torch.vtensor<[308,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1280],f16>
    %4685 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4686 = torch.aten.abs %4685 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_186, %indices_187 = torch.aten.max.dim %4686, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %4687 = torch.aten.view %values_186, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %4688 = torch.aten.broadcast_to %4687, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %4689 = torch.aten.clone %4688, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %4690 = torch.aten.view %4689, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %4691 = torch.aten.div.Scalar %4690, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4692 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4693 = torch.aten.detach %4692 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4694 = torch.aten.div.Tensor %arg2, %4691 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4695 = torch.aten.add.Tensor %4694, %4693, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4696 = torch.aten.round %4695 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4697 = torch.aten.clamp %4696, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4698 = torch.aten.sub.Tensor %4697, %4693, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %4699 = torch.aten.mul.Tensor %4698, %4691 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %4700 = torch.aten.broadcast_to %639, %4046 : !torch.vtensor<[1280,64,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,64,16],f16>
    %4701 = torch.aten.clone %4700, %int0 : !torch.vtensor<[1280,64,16],f16>, !torch.int -> !torch.vtensor<[1280,64,16],f16>
    %4702 = torch.aten.view %4701, %4049 : !torch.vtensor<[1280,64,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1024],f16>
    %4703 = torch.aten.mul.Tensor %638, %4702 : !torch.vtensor<[1280,1024],si8>, !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1280,1024],f16>
    %4704 = torch.aten.transpose.int %4703, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16>
    %4705 = torch.aten.view %4699, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %4706 = torch.aten.mm %4705, %4704 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[308,1280],f16>
    %4707 = torch.aten.view %4706, %4055 : !torch.vtensor<[308,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1280],f16>
    %4708 = torch.aten.view %4684, %4080 : !torch.vtensor<[4,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,20,64],f16>
    %4709 = torch.aten.permute %4708, %1239 : !torch.vtensor<[4,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,77,64],f16>
    %4710 = torch.aten.clone %4709, %int0 : !torch.vtensor<[4,20,77,64],f16>, !torch.int -> !torch.vtensor<[4,20,77,64],f16>
    %4711 = torch.aten.view %4710, %4084 : !torch.vtensor<[4,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[80,77,64],f16>
    %4712 = torch.aten.view %4707, %4080 : !torch.vtensor<[4,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,20,64],f16>
    %4713 = torch.aten.permute %4712, %1239 : !torch.vtensor<[4,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,77,64],f16>
    %4714 = torch.aten.clone %4713, %int0 : !torch.vtensor<[4,20,77,64],f16>, !torch.int -> !torch.vtensor<[4,20,77,64],f16>
    %4715 = torch.aten.view %4714, %4084 : !torch.vtensor<[4,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[80,77,64],f16>
    %4716 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4717 = torch.aten.to.dtype %4716, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4718 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4719 = torch.aten.broadcast_to %4717, %4718 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4720 = torch.aten.to.dtype %4661, %int6, %false, %false, %none : !torch.vtensor<[80,576,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,64],f32>
    %4721 = torch.prim.ListConstruct %int80, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4722 = torch.aten.broadcast_to %4720, %4721 : !torch.vtensor<[80,576,64],f32>, !torch.list<int> -> !torch.vtensor<[80,576,64],f32>
    %4723 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4724 = torch.aten.to.dtype %4723, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4725 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4726 = torch.aten.broadcast_to %4724, %4725 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,77,64],f32>
    %4727 = torch.aten.to.dtype %4711, %int6, %false, %false, %none : !torch.vtensor<[80,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,77,64],f32>
    %4728 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4729 = torch.aten.broadcast_to %4727, %4728 : !torch.vtensor<[80,77,64],f32>, !torch.list<int> -> !torch.vtensor<[80,77,64],f32>
    %4730 = torch.aten.empty.memory_format %4104, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[80,576,77],f32>
    %4731 = torch.aten.transpose.int %4729, %int-1, %int-2 : !torch.vtensor<[80,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[80,64,77],f32>
    %4732 = torch.aten.bmm %4722, %4731 : !torch.vtensor<[80,576,64],f32>, !torch.vtensor<[80,64,77],f32> -> !torch.vtensor<[80,576,77],f32>
    %4733 = torch.aten.mul.Scalar %4732, %float1.250000e-01 : !torch.vtensor<[80,576,77],f32>, !torch.float -> !torch.vtensor<[80,576,77],f32>
    %4734 = torch.aten.add.Tensor %4733, %4730, %int0 : !torch.vtensor<[80,576,77],f32>, !torch.vtensor<[80,576,77],f32>, !torch.int -> !torch.vtensor<[80,576,77],f32>
    %values_188, %indices_189 = torch.aten.max.dim %4734, %int-1, %true : !torch.vtensor<[80,576,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[80,576,1],f32>, !torch.vtensor<[80,576,1],si64>
    %4735 = torch.aten.sub.Tensor %4734, %values_188, %float1.000000e00 : !torch.vtensor<[80,576,77],f32>, !torch.vtensor<[80,576,1],f32>, !torch.float -> !torch.vtensor<[80,576,77],f32>
    %4736 = torch.aten.exp %4735 : !torch.vtensor<[80,576,77],f32> -> !torch.vtensor<[80,576,77],f32>
    %4737 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %4738 = torch.aten.sum.dim_IntList %4736, %4737, %true, %none : !torch.vtensor<[80,576,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[80,576,1],f32>
    %4739 = torch.aten.div.Tensor %4736, %4738 : !torch.vtensor<[80,576,77],f32>, !torch.vtensor<[80,576,1],f32> -> !torch.vtensor<[80,576,77],f32>
    %4740 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4741 = torch.aten.to.dtype %4740, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4742 = torch.prim.ListConstruct %int80, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4743 = torch.aten.broadcast_to %4741, %4742 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[80,576,77],f16>
    %4744 = torch.aten.to.dtype %4739, %int5, %false, %false, %none : !torch.vtensor<[80,576,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,576,77],f16>
    %4745 = torch.prim.ListConstruct %int80, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4746 = torch.aten.broadcast_to %4744, %4745 : !torch.vtensor<[80,576,77],f16>, !torch.list<int> -> !torch.vtensor<[80,576,77],f16>
    %4747 = torch.aten.bmm %4746, %4715 : !torch.vtensor<[80,576,77],f16>, !torch.vtensor<[80,77,64],f16> -> !torch.vtensor<[80,576,64],f16>
    %4748 = torch.aten.view %4747, %3957 : !torch.vtensor<[80,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,576,64],f16>
    %4749 = torch.aten.permute %4748, %1239 : !torch.vtensor<[4,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,20,64],f16>
    %4750 = torch.aten.clone %4749, %int0 : !torch.vtensor<[4,576,20,64],f16>, !torch.int -> !torch.vtensor<[4,576,20,64],f16>
    %4751 = torch.aten.view %4750, %3792 : !torch.vtensor<[4,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4752 = torch.aten.view %4751, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4753 = torch.aten.abs %4752 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_190, %indices_191 = torch.aten.max.dim %4753, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4754 = torch.aten.view %values_190, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4755 = torch.aten.broadcast_to %4754, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4756 = torch.aten.clone %4755, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4757 = torch.aten.view %4756, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4758 = torch.aten.div.Scalar %4757, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4759 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4760 = torch.aten.detach %4759 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4761 = torch.aten.div.Tensor %4751, %4758 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4762 = torch.aten.add.Tensor %4761, %4760, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4763 = torch.aten.round %4762 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4764 = torch.aten.clamp %4763, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4765 = torch.aten.sub.Tensor %4764, %4760, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4766 = torch.aten.mul.Tensor %4765, %4758 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4767 = torch.aten.broadcast_to %637, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4768 = torch.aten.clone %4767, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4769 = torch.aten.view %4768, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4770 = torch.aten.mul.Tensor %636, %4769 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4771 = torch.aten.transpose.int %4770, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4772 = torch.aten.view %4766, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4773 = torch.aten.mm %4772, %4771 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4774 = torch.aten.mul.Scalar %635, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4775 = torch.aten.add.Tensor %4774, %4773, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4776 = torch.aten.view %4775, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4777 = torch.aten.add.Tensor %4776, %4618, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4778 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %4779 = torch.aten.sum.dim_IntList %4777, %4778, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4780 = torch.aten.div.Scalar %4779, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4781 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4782 = torch.aten.broadcast_to %4780, %4781 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4783 = torch.aten.sub.Tensor %4777, %4782, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4784 = torch.aten.mul.Tensor %4783, %4783 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4785 = torch.aten.sum.dim_IntList %4784, %4778, %true, %none : !torch.vtensor<[4,576,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,576,1],f16>
    %4786 = torch.aten.div.Scalar %4785, %int1280 : !torch.vtensor<[4,576,1],f16>, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4787 = torch.aten.add.Scalar %4786, %float1.000000e-05, %int1 : !torch.vtensor<[4,576,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,576,1],f16>
    %4788 = torch.aten.rsqrt %4787 : !torch.vtensor<[4,576,1],f16> -> !torch.vtensor<[4,576,1],f16>
    %4789 = torch.prim.ListConstruct %int4, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4790 = torch.aten.broadcast_to %4788, %4789 : !torch.vtensor<[4,576,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4791 = torch.aten.mul.Tensor %4783, %4790 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4792 = torch.aten.mul.Tensor %4791, %634 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4793 = torch.aten.add.Tensor %4792, %633, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4794 = torch.aten.view %4793, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4795 = torch.aten.abs %4794 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_192, %indices_193 = torch.aten.max.dim %4795, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4796 = torch.aten.view %values_192, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4797 = torch.aten.broadcast_to %4796, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4798 = torch.aten.clone %4797, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4799 = torch.aten.view %4798, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4800 = torch.aten.div.Scalar %4799, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4801 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4802 = torch.aten.detach %4801 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4803 = torch.aten.div.Tensor %4793, %4800 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4804 = torch.aten.add.Tensor %4803, %4802, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4805 = torch.aten.round %4804 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4806 = torch.aten.clamp %4805, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4807 = torch.aten.sub.Tensor %4806, %4802, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4808 = torch.aten.mul.Tensor %4807, %4800 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4809 = torch.aten.broadcast_to %632, %4184 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16>
    %4810 = torch.aten.clone %4809, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16>
    %4811 = torch.aten.view %4810, %4187 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16>
    %4812 = torch.aten.mul.Tensor %631, %4811 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16>
    %4813 = torch.aten.transpose.int %4812, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %4814 = torch.aten.view %4808, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4815 = torch.aten.mm %4814, %4813 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[2304,10240],f16>
    %4816 = torch.aten.mul.Scalar %630, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %4817 = torch.aten.add.Tensor %4816, %4815, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[2304,10240],f16>, !torch.int -> !torch.vtensor<[2304,10240],f16>
    %4818 = torch.aten.view %4817, %4195 : !torch.vtensor<[2304,10240],f16>, !torch.list<int> -> !torch.vtensor<[4,576,10240],f16>
    %4819 = torch.aten.slice.Tensor %4818, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[4,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4820 = torch.aten.slice.Tensor %4818, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[4,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4821 = torch.aten.gelu %4820, %str : !torch.vtensor<[4,576,5120],f16>, !torch.str -> !torch.vtensor<[4,576,5120],f16>
    %4822 = torch.aten.mul.Tensor %4819, %4821 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4823 = torch.aten.view %4822, %4201 : !torch.vtensor<[4,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[4,576,320,16],f16>
    %4824 = torch.aten.abs %4823 : !torch.vtensor<[4,576,320,16],f16> -> !torch.vtensor<[4,576,320,16],f16>
    %values_194, %indices_195 = torch.aten.max.dim %4824, %int3, %true : !torch.vtensor<[4,576,320,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,320,1],f16>, !torch.vtensor<[4,576,320,1],si64>
    %4825 = torch.aten.view %values_194, %4204 : !torch.vtensor<[4,576,320,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,320,1],f16>
    %4826 = torch.aten.broadcast_to %4825, %4201 : !torch.vtensor<[4,576,320,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,320,16],f16>
    %4827 = torch.aten.clone %4826, %int0 : !torch.vtensor<[4,576,320,16],f16>, !torch.int -> !torch.vtensor<[4,576,320,16],f16>
    %4828 = torch.aten.view %4827, %4208 : !torch.vtensor<[4,576,320,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,5120],f16>
    %4829 = torch.aten.div.Scalar %4828, %int128 : !torch.vtensor<[4,576,5120],f16>, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4830 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4831 = torch.aten.detach %4830 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4832 = torch.aten.div.Tensor %4822, %4829 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4833 = torch.aten.add.Tensor %4832, %4831, %int1 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4834 = torch.aten.round %4833 : !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4835 = torch.aten.clamp %4834, %int-128, %int127 : !torch.vtensor<[4,576,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4836 = torch.aten.sub.Tensor %4835, %4831, %int1 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,5120],f16>
    %4837 = torch.aten.mul.Tensor %4836, %4829 : !torch.vtensor<[4,576,5120],f16>, !torch.vtensor<[4,576,5120],f16> -> !torch.vtensor<[4,576,5120],f16>
    %4838 = torch.aten.broadcast_to %629, %4219 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16>
    %4839 = torch.aten.clone %4838, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16>
    %4840 = torch.aten.view %4839, %4222 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16>
    %4841 = torch.aten.mul.Tensor %628, %4840 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16>
    %4842 = torch.aten.transpose.int %4841, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %4843 = torch.aten.view %4837, %4226 : !torch.vtensor<[4,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[2304,5120],f16>
    %4844 = torch.aten.mm %4843, %4842 : !torch.vtensor<[2304,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4845 = torch.aten.mul.Scalar %627, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4846 = torch.aten.add.Tensor %4845, %4844, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4847 = torch.aten.view %4846, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4848 = torch.aten.add.Tensor %4847, %4777, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4849 = torch.aten.view %4848, %3795 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4850 = torch.aten.abs %4849 : !torch.vtensor<[4,576,80,16],f16> -> !torch.vtensor<[4,576,80,16],f16>
    %values_196, %indices_197 = torch.aten.max.dim %4850, %int3, %true : !torch.vtensor<[4,576,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,576,80,1],f16>, !torch.vtensor<[4,576,80,1],si64>
    %4851 = torch.aten.view %values_196, %3798 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,1],f16>
    %4852 = torch.aten.broadcast_to %4851, %3795 : !torch.vtensor<[4,576,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,576,80,16],f16>
    %4853 = torch.aten.clone %4852, %int0 : !torch.vtensor<[4,576,80,16],f16>, !torch.int -> !torch.vtensor<[4,576,80,16],f16>
    %4854 = torch.aten.view %4853, %3792 : !torch.vtensor<[4,576,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4855 = torch.aten.div.Scalar %4854, %int128 : !torch.vtensor<[4,576,1280],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4856 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4857 = torch.aten.detach %4856 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4858 = torch.aten.div.Tensor %4848, %4855 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4859 = torch.aten.add.Tensor %4858, %4857, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4860 = torch.aten.round %4859 : !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4861 = torch.aten.clamp %4860, %int-128, %int127 : !torch.vtensor<[4,576,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4862 = torch.aten.sub.Tensor %4861, %4857, %int1 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,576,1280],f16>
    %4863 = torch.aten.mul.Tensor %4862, %4855 : !torch.vtensor<[4,576,1280],f16>, !torch.vtensor<[4,576,1280],f16> -> !torch.vtensor<[4,576,1280],f16>
    %4864 = torch.aten.broadcast_to %626, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %4865 = torch.aten.clone %4864, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %4866 = torch.aten.view %4865, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %4867 = torch.aten.mul.Tensor %625, %4866 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %4868 = torch.aten.transpose.int %4867, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4869 = torch.aten.view %4863, %3819 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[2304,1280],f16>
    %4870 = torch.aten.mm %4869, %4868 : !torch.vtensor<[2304,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2304,1280],f16>
    %4871 = torch.aten.mul.Scalar %624, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4872 = torch.aten.add.Tensor %4871, %4870, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2304,1280],f16>, !torch.int -> !torch.vtensor<[2304,1280],f16>
    %4873 = torch.aten.view %4872, %3792 : !torch.vtensor<[2304,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,576,1280],f16>
    %4874 = torch.aten.view %4873, %4258 : !torch.vtensor<[4,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,24,24,1280],f16>
    %4875 = torch.aten.permute %4874, %1638 : !torch.vtensor<[4,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4876 = torch.aten.clone %4875, %int0 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4877 = torch.aten.add.Tensor %4876, %4393, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4878 = torch.aten.view %4877, %3705 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %4879 = torch.aten.abs %4878 : !torch.vtensor<[4,80,16,24,24],f16> -> !torch.vtensor<[4,80,16,24,24],f16>
    %values_198, %indices_199 = torch.aten.max.dim %4879, %int2, %true : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,24,24],f16>, !torch.vtensor<[4,80,1,24,24],si64>
    %4880 = torch.aten.view %values_198, %3708 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,24,24],f16>
    %4881 = torch.aten.broadcast_to %4880, %3705 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %4882 = torch.aten.clone %4881, %int0 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,80,16,24,24],f16>
    %4883 = torch.aten.view %4882, %3686 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %4884 = torch.aten.div.Scalar %4883, %int128 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4885 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4886 = torch.aten.detach %4885 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4887 = torch.aten.div.Tensor %4877, %4884 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4888 = torch.aten.add.Tensor %4887, %4886, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4889 = torch.aten.round %4888 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4890 = torch.aten.clamp %4889, %int-128, %int127 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4891 = torch.aten.sub.Tensor %4890, %4886, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %4892 = torch.aten.mul.Tensor %4891, %4884 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %4893 = torch.aten.broadcast_to %623, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4894 = torch.aten.clone %4893, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4895 = torch.aten.view %4894, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4896 = torch.aten.mul.Tensor %622, %4895 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4897 = torch.aten.convolution %4892, %4896, %621, %2276, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4898 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4899 = torch.aten.view %4897, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %4900 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4901 = torch.aten.to.dtype %4900, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4902 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4903 = torch.aten.broadcast_to %4901, %4902 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %4904 = torch.aten.to.dtype %4899, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %4905 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4906 = torch.aten.broadcast_to %4904, %4905 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %4907 = torch.aten.to.dtype %4906, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %4908 = torch.aten.sum.dim_IntList %4907, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4909 = torch.aten.div.Scalar %4908, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4910 = torch.aten.sub.Tensor %4907, %4909, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %4911 = torch.aten.mul.Tensor %4910, %4910 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %4912 = torch.aten.sum.dim_IntList %4911, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4913 = torch.aten.div.Scalar %4912, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4914 = torch.aten.to.dtype %4913, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4915 = torch.aten.sum.dim_IntList %4906, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4916 = torch.aten.div.Scalar %4915, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4917 = torch.aten.add.Tensor %4914, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4918 = torch.aten.rsqrt %4917 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %4919 = torch.aten.sub.Tensor %4899, %4916, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %4920 = torch.aten.mul.Tensor %4919, %4918 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %4921 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4922 = torch.aten.view %4920, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %4923 = torch.aten.unsqueeze %620, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4924 = torch.aten.unsqueeze %4923, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4925 = torch.aten.unsqueeze %4924, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4926 = torch.aten.unsqueeze %619, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4927 = torch.aten.unsqueeze %4926, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4928 = torch.aten.unsqueeze %4927, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4929 = torch.aten.mul.Tensor %4922, %4928 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %4930 = torch.aten.add.Tensor %4929, %4925, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %4931 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4932 = torch.aten.to.dtype %4931, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %4933 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4934 = torch.aten.broadcast_to %4932, %4933 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %4935 = torch.aten.to.dtype %4930, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %4936 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4937 = torch.aten.broadcast_to %4935, %4936 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %4938 = torch.aten.sigmoid %4937 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %4939 = torch.aten.mul.Tensor %4938, %4937 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %4940 = torch.prim.ListConstruct %int4, %int80, %int16, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4941 = torch.aten.view %4939, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %4942 = torch.aten.abs %4941 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_200, %indices_201 = torch.aten.max.dim %4942, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %4943 = torch.prim.ListConstruct %int4, %int80, %int1, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4944 = torch.aten.view %values_200, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %4945 = torch.aten.broadcast_to %4944, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %4946 = torch.aten.clone %4945, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %4947 = torch.aten.view %4946, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %4948 = torch.aten.div.Scalar %4947, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4949 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4950 = torch.aten.detach %4949 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %4951 = torch.aten.div.Tensor %4939, %4948 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %4952 = torch.aten.add.Tensor %4951, %4950, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4953 = torch.aten.round %4952 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %4954 = torch.aten.clamp %4953, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4955 = torch.aten.sub.Tensor %4954, %4950, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4956 = torch.aten.mul.Tensor %4955, %4948 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %4957 = torch.aten.broadcast_to %618, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4958 = torch.aten.clone %4957, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %4959 = torch.aten.view %4958, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4960 = torch.aten.mul.Tensor %617, %4959 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %4961 = torch.aten.convolution %4956, %4960, %616, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4962 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %4963 = torch.aten.mul.Tensor %4962, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %4964 = torch.aten.transpose.int %615, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %4965 = torch.aten.mm %4963, %4964 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %4966 = torch.aten.mul.Scalar %614, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %4967 = torch.aten.add.Tensor %4966, %4965, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %4968 = torch.aten.unsqueeze %4967, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %4969 = torch.aten.unsqueeze %4968, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %4970 = torch.aten.add.Tensor %4961, %4969, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %4971 = torch.aten.view %4970, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %4972 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %4973 = torch.aten.to.dtype %4972, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %4974 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4975 = torch.aten.broadcast_to %4973, %4974 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %4976 = torch.aten.to.dtype %4971, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %4977 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4978 = torch.aten.broadcast_to %4976, %4977 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %4979 = torch.aten.to.dtype %4978, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %4980 = torch.aten.sum.dim_IntList %4979, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4981 = torch.aten.div.Scalar %4980, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4982 = torch.aten.sub.Tensor %4979, %4981, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %4983 = torch.aten.mul.Tensor %4982, %4982 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %4984 = torch.aten.sum.dim_IntList %4983, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %4985 = torch.aten.div.Scalar %4984, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %4986 = torch.aten.to.dtype %4985, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4987 = torch.aten.sum.dim_IntList %4978, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %4988 = torch.aten.div.Scalar %4987, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4989 = torch.aten.add.Tensor %4986, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %4990 = torch.aten.rsqrt %4989 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %4991 = torch.aten.sub.Tensor %4971, %4988, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %4992 = torch.aten.mul.Tensor %4991, %4990 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %4993 = torch.aten.view %4992, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %4994 = torch.aten.unsqueeze %613, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4995 = torch.aten.unsqueeze %4994, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4996 = torch.aten.unsqueeze %4995, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %4997 = torch.aten.unsqueeze %612, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %4998 = torch.aten.unsqueeze %4997, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %4999 = torch.aten.unsqueeze %4998, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5000 = torch.aten.mul.Tensor %4993, %4999 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5001 = torch.aten.add.Tensor %5000, %4996, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5002 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5003 = torch.aten.to.dtype %5002, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5004 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5005 = torch.aten.broadcast_to %5003, %5004 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5006 = torch.aten.to.dtype %5001, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5007 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5008 = torch.aten.broadcast_to %5006, %5007 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5009 = torch.aten.sigmoid %5008 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5010 = torch.aten.mul.Tensor %5009, %5008 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5011 = torch.aten.view %5010, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5012 = torch.aten.abs %5011 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_202, %indices_203 = torch.aten.max.dim %5012, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5013 = torch.aten.view %values_202, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5014 = torch.aten.broadcast_to %5013, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5015 = torch.aten.clone %5014, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5016 = torch.aten.view %5015, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5017 = torch.aten.div.Scalar %5016, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5018 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5019 = torch.aten.detach %5018 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5020 = torch.aten.div.Tensor %5010, %5017 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5021 = torch.aten.add.Tensor %5020, %5019, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5022 = torch.aten.round %5021 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5023 = torch.aten.clamp %5022, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5024 = torch.aten.sub.Tensor %5023, %5019, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5025 = torch.aten.mul.Tensor %5024, %5017 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5026 = torch.aten.broadcast_to %611, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5027 = torch.aten.clone %5026, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5028 = torch.aten.view %5027, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5029 = torch.aten.mul.Tensor %610, %5028 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5030 = torch.aten.convolution %5025, %5029, %609, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5031 = torch.aten.add.Tensor %4897, %5030, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5032 = torch.aten.div.Tensor %5031, %925 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5033 = torch.aten.view %5032, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5034 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5035 = torch.aten.to.dtype %5034, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5036 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5037 = torch.aten.broadcast_to %5035, %5036 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5038 = torch.aten.to.dtype %5033, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5039 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5040 = torch.aten.broadcast_to %5038, %5039 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5041 = torch.aten.to.dtype %5040, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5042 = torch.aten.sum.dim_IntList %5041, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5043 = torch.aten.div.Scalar %5042, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5044 = torch.aten.sub.Tensor %5041, %5043, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5045 = torch.aten.mul.Tensor %5044, %5044 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5046 = torch.aten.sum.dim_IntList %5045, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5047 = torch.aten.div.Scalar %5046, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5048 = torch.aten.to.dtype %5047, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5049 = torch.aten.sum.dim_IntList %5040, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5050 = torch.aten.div.Scalar %5049, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5051 = torch.aten.add.Tensor %5048, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5052 = torch.aten.rsqrt %5051 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5053 = torch.aten.sub.Tensor %5033, %5050, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5054 = torch.aten.mul.Tensor %5053, %5052 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5055 = torch.aten.view %5054, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5056 = torch.aten.unsqueeze %608, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5057 = torch.aten.unsqueeze %5056, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5058 = torch.aten.unsqueeze %5057, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5059 = torch.aten.unsqueeze %607, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5060 = torch.aten.unsqueeze %5059, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5061 = torch.aten.unsqueeze %5060, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5062 = torch.aten.mul.Tensor %5055, %5061 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5063 = torch.aten.add.Tensor %5062, %5058, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5064 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5065 = torch.aten.to.dtype %5064, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5066 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5067 = torch.aten.broadcast_to %5065, %5066 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5068 = torch.aten.to.dtype %5063, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5069 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5070 = torch.aten.broadcast_to %5068, %5069 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5071 = torch.aten.sigmoid %5070 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5072 = torch.aten.mul.Tensor %5071, %5070 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5073 = torch.aten.view %5072, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5074 = torch.aten.abs %5073 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_204, %indices_205 = torch.aten.max.dim %5074, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5075 = torch.aten.view %values_204, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5076 = torch.aten.broadcast_to %5075, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5077 = torch.aten.clone %5076, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5078 = torch.aten.view %5077, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5079 = torch.aten.div.Scalar %5078, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5080 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5081 = torch.aten.detach %5080 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5082 = torch.aten.div.Tensor %5072, %5079 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5083 = torch.aten.add.Tensor %5082, %5081, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5084 = torch.aten.round %5083 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5085 = torch.aten.clamp %5084, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5086 = torch.aten.sub.Tensor %5085, %5081, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5087 = torch.aten.mul.Tensor %5086, %5079 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5088 = torch.aten.broadcast_to %606, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5089 = torch.aten.clone %5088, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5090 = torch.aten.view %5089, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5091 = torch.aten.mul.Tensor %605, %5090 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5092 = torch.aten.convolution %5087, %5091, %604, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5093 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5094 = torch.aten.mul.Tensor %5093, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5095 = torch.aten.transpose.int %603, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5096 = torch.aten.mm %5094, %5095 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5097 = torch.aten.mul.Scalar %602, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5098 = torch.aten.add.Tensor %5097, %5096, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %5099 = torch.aten.unsqueeze %5098, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %5100 = torch.aten.unsqueeze %5099, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %5101 = torch.aten.add.Tensor %5092, %5100, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5102 = torch.aten.view %5101, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5103 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5104 = torch.aten.to.dtype %5103, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5105 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5106 = torch.aten.broadcast_to %5104, %5105 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5107 = torch.aten.to.dtype %5102, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5108 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5109 = torch.aten.broadcast_to %5107, %5108 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5110 = torch.aten.to.dtype %5109, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5111 = torch.aten.sum.dim_IntList %5110, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5112 = torch.aten.div.Scalar %5111, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5113 = torch.aten.sub.Tensor %5110, %5112, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5114 = torch.aten.mul.Tensor %5113, %5113 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5115 = torch.aten.sum.dim_IntList %5114, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5116 = torch.aten.div.Scalar %5115, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5117 = torch.aten.to.dtype %5116, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5118 = torch.aten.sum.dim_IntList %5109, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5119 = torch.aten.div.Scalar %5118, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5120 = torch.aten.add.Tensor %5117, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5121 = torch.aten.rsqrt %5120 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5122 = torch.aten.sub.Tensor %5102, %5119, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5123 = torch.aten.mul.Tensor %5122, %5121 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5124 = torch.aten.view %5123, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5125 = torch.aten.unsqueeze %601, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5126 = torch.aten.unsqueeze %5125, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5127 = torch.aten.unsqueeze %5126, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5128 = torch.aten.unsqueeze %600, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5129 = torch.aten.unsqueeze %5128, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5130 = torch.aten.unsqueeze %5129, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5131 = torch.aten.mul.Tensor %5124, %5130 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5132 = torch.aten.add.Tensor %5131, %5127, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5133 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5134 = torch.aten.to.dtype %5133, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5135 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5136 = torch.aten.broadcast_to %5134, %5135 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5137 = torch.aten.to.dtype %5132, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5138 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5139 = torch.aten.broadcast_to %5137, %5138 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5140 = torch.aten.sigmoid %5139 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5141 = torch.aten.mul.Tensor %5140, %5139 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5142 = torch.aten.view %5141, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5143 = torch.aten.abs %5142 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_206, %indices_207 = torch.aten.max.dim %5143, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5144 = torch.aten.view %values_206, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5145 = torch.aten.broadcast_to %5144, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5146 = torch.aten.clone %5145, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5147 = torch.aten.view %5146, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5148 = torch.aten.div.Scalar %5147, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5149 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5150 = torch.aten.detach %5149 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5151 = torch.aten.div.Tensor %5141, %5148 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5152 = torch.aten.add.Tensor %5151, %5150, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5153 = torch.aten.round %5152 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5154 = torch.aten.clamp %5153, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5155 = torch.aten.sub.Tensor %5154, %5150, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5156 = torch.aten.mul.Tensor %5155, %5148 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5157 = torch.aten.broadcast_to %599, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5158 = torch.aten.clone %5157, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5159 = torch.aten.view %5158, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5160 = torch.aten.mul.Tensor %598, %5159 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5161 = torch.aten.convolution %5156, %5160, %597, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5162 = torch.aten.add.Tensor %5032, %5161, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5163 = torch.aten.div.Tensor %5162, %925 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5164 = torch.aten.view %5163, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5165 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5166 = torch.aten.to.dtype %5165, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5167 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5168 = torch.aten.broadcast_to %5166, %5167 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5169 = torch.aten.to.dtype %5164, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5170 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5171 = torch.aten.broadcast_to %5169, %5170 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5172 = torch.aten.to.dtype %5171, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5173 = torch.aten.sum.dim_IntList %5172, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5174 = torch.aten.div.Scalar %5173, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5175 = torch.aten.sub.Tensor %5172, %5174, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5176 = torch.aten.mul.Tensor %5175, %5175 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5177 = torch.aten.sum.dim_IntList %5176, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5178 = torch.aten.div.Scalar %5177, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5179 = torch.aten.to.dtype %5178, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5180 = torch.aten.sum.dim_IntList %5171, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5181 = torch.aten.div.Scalar %5180, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5182 = torch.aten.add.Tensor %5179, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5183 = torch.aten.rsqrt %5182 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5184 = torch.aten.sub.Tensor %5164, %5181, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5185 = torch.aten.mul.Tensor %5184, %5183 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5186 = torch.aten.view %5185, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5187 = torch.aten.unsqueeze %596, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5188 = torch.aten.unsqueeze %5187, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5189 = torch.aten.unsqueeze %5188, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5190 = torch.aten.unsqueeze %595, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5191 = torch.aten.unsqueeze %5190, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5192 = torch.aten.unsqueeze %5191, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5193 = torch.aten.mul.Tensor %5186, %5192 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5194 = torch.aten.add.Tensor %5193, %5189, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5195 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5196 = torch.aten.to.dtype %5195, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5197 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5198 = torch.aten.broadcast_to %5196, %5197 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5199 = torch.aten.to.dtype %5194, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5200 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5201 = torch.aten.broadcast_to %5199, %5200 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5202 = torch.aten.sigmoid %5201 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5203 = torch.aten.mul.Tensor %5202, %5201 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5204 = torch.aten.view %5203, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5205 = torch.aten.abs %5204 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_208, %indices_209 = torch.aten.max.dim %5205, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5206 = torch.aten.view %values_208, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5207 = torch.aten.broadcast_to %5206, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5208 = torch.aten.clone %5207, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5209 = torch.aten.view %5208, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5210 = torch.aten.div.Scalar %5209, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5211 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5212 = torch.aten.detach %5211 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5213 = torch.aten.div.Tensor %5203, %5210 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5214 = torch.aten.add.Tensor %5213, %5212, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5215 = torch.aten.round %5214 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5216 = torch.aten.clamp %5215, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5217 = torch.aten.sub.Tensor %5216, %5212, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5218 = torch.aten.mul.Tensor %5217, %5210 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5219 = torch.aten.broadcast_to %594, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5220 = torch.aten.clone %5219, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5221 = torch.aten.view %5220, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5222 = torch.aten.mul.Tensor %593, %5221 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5223 = torch.aten.convolution %5218, %5222, %592, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5224 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5225 = torch.aten.mul.Tensor %5224, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5226 = torch.aten.transpose.int %591, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5227 = torch.aten.mm %5225, %5226 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5228 = torch.aten.mul.Scalar %590, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5229 = torch.aten.add.Tensor %5228, %5227, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %5230 = torch.aten.unsqueeze %5229, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %5231 = torch.aten.unsqueeze %5230, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %5232 = torch.aten.add.Tensor %5223, %5231, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5233 = torch.aten.view %5232, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5234 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5235 = torch.aten.to.dtype %5234, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5236 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5237 = torch.aten.broadcast_to %5235, %5236 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5238 = torch.aten.to.dtype %5233, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5239 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5240 = torch.aten.broadcast_to %5238, %5239 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5241 = torch.aten.to.dtype %5240, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5242 = torch.aten.sum.dim_IntList %5241, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5243 = torch.aten.div.Scalar %5242, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5244 = torch.aten.sub.Tensor %5241, %5243, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5245 = torch.aten.mul.Tensor %5244, %5244 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5246 = torch.aten.sum.dim_IntList %5245, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5247 = torch.aten.div.Scalar %5246, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5248 = torch.aten.to.dtype %5247, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5249 = torch.aten.sum.dim_IntList %5240, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5250 = torch.aten.div.Scalar %5249, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5251 = torch.aten.add.Tensor %5248, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5252 = torch.aten.rsqrt %5251 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5253 = torch.aten.sub.Tensor %5233, %5250, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5254 = torch.aten.mul.Tensor %5253, %5252 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5255 = torch.aten.view %5254, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5256 = torch.aten.unsqueeze %589, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5257 = torch.aten.unsqueeze %5256, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5258 = torch.aten.unsqueeze %5257, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5259 = torch.aten.unsqueeze %588, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5260 = torch.aten.unsqueeze %5259, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5261 = torch.aten.unsqueeze %5260, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5262 = torch.aten.mul.Tensor %5255, %5261 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5263 = torch.aten.add.Tensor %5262, %5258, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5264 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5265 = torch.aten.to.dtype %5264, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5266 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5267 = torch.aten.broadcast_to %5265, %5266 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5268 = torch.aten.to.dtype %5263, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5269 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5270 = torch.aten.broadcast_to %5268, %5269 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5271 = torch.aten.sigmoid %5270 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5272 = torch.aten.mul.Tensor %5271, %5270 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5273 = torch.aten.view %5272, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5274 = torch.aten.abs %5273 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_210, %indices_211 = torch.aten.max.dim %5274, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5275 = torch.aten.view %values_210, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5276 = torch.aten.broadcast_to %5275, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5277 = torch.aten.clone %5276, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5278 = torch.aten.view %5277, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5279 = torch.aten.div.Scalar %5278, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5280 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5281 = torch.aten.detach %5280 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5282 = torch.aten.div.Tensor %5272, %5279 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5283 = torch.aten.add.Tensor %5282, %5281, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5284 = torch.aten.round %5283 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5285 = torch.aten.clamp %5284, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5286 = torch.aten.sub.Tensor %5285, %5281, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5287 = torch.aten.mul.Tensor %5286, %5279 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5288 = torch.aten.broadcast_to %587, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5289 = torch.aten.clone %5288, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5290 = torch.aten.view %5289, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5291 = torch.aten.mul.Tensor %586, %5290 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5292 = torch.aten.convolution %5287, %5291, %585, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5293 = torch.aten.add.Tensor %5163, %5292, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5294 = torch.aten.div.Scalar %5293, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5295 = torch.aten.view %5294, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5296 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5297 = torch.aten.to.dtype %5296, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5298 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5299 = torch.aten.broadcast_to %5297, %5298 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5300 = torch.aten.to.dtype %5295, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5301 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5302 = torch.aten.broadcast_to %5300, %5301 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5303 = torch.aten.to.dtype %5302, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5304 = torch.aten.sum.dim_IntList %5303, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5305 = torch.aten.div.Scalar %5304, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5306 = torch.aten.sub.Tensor %5303, %5305, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5307 = torch.aten.mul.Tensor %5306, %5306 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5308 = torch.aten.sum.dim_IntList %5307, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5309 = torch.aten.div.Scalar %5308, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5310 = torch.aten.to.dtype %5309, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5311 = torch.aten.sum.dim_IntList %5302, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5312 = torch.aten.div.Scalar %5311, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5313 = torch.aten.add.Tensor %5310, %924, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5314 = torch.aten.rsqrt %5313 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5315 = torch.aten.sub.Tensor %5295, %5312, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5316 = torch.aten.mul.Tensor %5315, %5314 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5317 = torch.aten.view %5316, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5318 = torch.aten.unsqueeze %584, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5319 = torch.aten.unsqueeze %5318, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5320 = torch.aten.unsqueeze %5319, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5321 = torch.aten.unsqueeze %583, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5322 = torch.aten.unsqueeze %5321, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5323 = torch.aten.unsqueeze %5322, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5324 = torch.aten.mul.Tensor %5317, %5323 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5325 = torch.aten.add.Tensor %5324, %5320, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5326 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5327 = torch.aten.to.dtype %5326, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5328 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5329 = torch.aten.broadcast_to %5327, %5328 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5330 = torch.aten.to.dtype %5325, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5331 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5332 = torch.aten.broadcast_to %5330, %5331 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5333 = torch.aten.permute %5332, %1163 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,12,12,1280],f16>
    %5334 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5335 = torch.aten.view %5333, %5334 : !torch.vtensor<[4,12,12,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5336 = torch.aten.clone %5335, %int0 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5337 = torch.prim.ListConstruct %int4, %int144, %int80, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5338 = torch.aten.view %5336, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5339 = torch.aten.abs %5338 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_212, %indices_213 = torch.aten.max.dim %5339, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5340 = torch.prim.ListConstruct %int4, %int144, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5341 = torch.aten.view %values_212, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5342 = torch.aten.broadcast_to %5341, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5343 = torch.aten.clone %5342, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5344 = torch.aten.view %5343, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5345 = torch.aten.div.Scalar %5344, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5346 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5347 = torch.aten.detach %5346 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5348 = torch.aten.div.Tensor %5336, %5345 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5349 = torch.aten.add.Tensor %5348, %5347, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5350 = torch.aten.round %5349 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5351 = torch.aten.clamp %5350, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5352 = torch.aten.sub.Tensor %5351, %5347, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5353 = torch.aten.mul.Tensor %5352, %5345 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5354 = torch.aten.broadcast_to %582, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5355 = torch.aten.clone %5354, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5356 = torch.aten.view %5355, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5357 = torch.aten.mul.Tensor %581, %5356 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5358 = torch.aten.transpose.int %5357, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5359 = torch.prim.ListConstruct %int576, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
    %5360 = torch.aten.view %5353, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5361 = torch.aten.mm %5360, %5358 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5362 = torch.aten.mul.Scalar %580, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5363 = torch.aten.add.Tensor %5362, %5361, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[576,1280],f16>, !torch.int -> !torch.vtensor<[576,1280],f16>
    %5364 = torch.aten.view %5363, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5365 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5366 = torch.aten.sum.dim_IntList %5364, %5365, %true, %none : !torch.vtensor<[4,144,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,144,1],f16>
    %5367 = torch.aten.div.Scalar %5366, %int1280 : !torch.vtensor<[4,144,1],f16>, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5368 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5369 = torch.aten.broadcast_to %5367, %5368 : !torch.vtensor<[4,144,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5370 = torch.aten.sub.Tensor %5364, %5369, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5371 = torch.aten.mul.Tensor %5370, %5370 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5372 = torch.aten.sum.dim_IntList %5371, %5365, %true, %none : !torch.vtensor<[4,144,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,144,1],f16>
    %5373 = torch.aten.div.Scalar %5372, %int1280 : !torch.vtensor<[4,144,1],f16>, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5374 = torch.aten.add.Scalar %5373, %float1.000000e-05, %int1 : !torch.vtensor<[4,144,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5375 = torch.aten.rsqrt %5374 : !torch.vtensor<[4,144,1],f16> -> !torch.vtensor<[4,144,1],f16>
    %5376 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5377 = torch.aten.broadcast_to %5375, %5376 : !torch.vtensor<[4,144,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5378 = torch.aten.mul.Tensor %5370, %5377 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5379 = torch.aten.mul.Tensor %5378, %579 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5380 = torch.aten.add.Tensor %5379, %578, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5381 = torch.aten.view %5380, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5382 = torch.aten.abs %5381 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_214, %indices_215 = torch.aten.max.dim %5382, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5383 = torch.aten.view %values_214, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5384 = torch.aten.broadcast_to %5383, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5385 = torch.aten.clone %5384, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5386 = torch.aten.view %5385, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5387 = torch.aten.div.Scalar %5386, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5388 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5389 = torch.aten.detach %5388 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5390 = torch.aten.div.Tensor %5380, %5387 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5391 = torch.aten.add.Tensor %5390, %5389, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5392 = torch.aten.round %5391 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5393 = torch.aten.clamp %5392, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5394 = torch.aten.sub.Tensor %5393, %5389, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5395 = torch.aten.mul.Tensor %5394, %5387 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5396 = torch.aten.broadcast_to %577, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5397 = torch.aten.clone %5396, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5398 = torch.aten.view %5397, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5399 = torch.aten.mul.Tensor %576, %5398 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5400 = torch.aten.transpose.int %5399, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5401 = torch.aten.view %5395, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5402 = torch.aten.mm %5401, %5400 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5403 = torch.aten.view %5402, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5404 = torch.prim.ListConstruct %int4, %int144, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5405 = torch.aten.view %5403, %5404 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,20,64],f16>
    %5406 = torch.aten.permute %5405, %1239 : !torch.vtensor<[4,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,144,64],f16>
    %5407 = torch.aten.clone %5406, %int0 : !torch.vtensor<[4,20,144,64],f16>, !torch.int -> !torch.vtensor<[4,20,144,64],f16>
    %5408 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5409 = torch.aten.view %5407, %5408 : !torch.vtensor<[4,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[80,144,64],f16>
    %5410 = torch.aten.view %5380, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5411 = torch.aten.abs %5410 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_216, %indices_217 = torch.aten.max.dim %5411, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5412 = torch.aten.view %values_216, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5413 = torch.aten.broadcast_to %5412, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5414 = torch.aten.clone %5413, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5415 = torch.aten.view %5414, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5416 = torch.aten.div.Scalar %5415, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5417 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5418 = torch.aten.detach %5417 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5419 = torch.aten.div.Tensor %5380, %5416 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5420 = torch.aten.add.Tensor %5419, %5418, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5421 = torch.aten.round %5420 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5422 = torch.aten.clamp %5421, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5423 = torch.aten.sub.Tensor %5422, %5418, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5424 = torch.aten.mul.Tensor %5423, %5416 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5425 = torch.aten.broadcast_to %575, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5426 = torch.aten.clone %5425, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5427 = torch.aten.view %5426, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5428 = torch.aten.mul.Tensor %574, %5427 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5429 = torch.aten.transpose.int %5428, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5430 = torch.aten.view %5424, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5431 = torch.aten.mm %5430, %5429 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5432 = torch.aten.view %5431, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5433 = torch.aten.view %5380, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5434 = torch.aten.abs %5433 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_218, %indices_219 = torch.aten.max.dim %5434, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5435 = torch.aten.view %values_218, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5436 = torch.aten.broadcast_to %5435, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5437 = torch.aten.clone %5436, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5438 = torch.aten.view %5437, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5439 = torch.aten.div.Scalar %5438, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5440 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5441 = torch.aten.detach %5440 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5442 = torch.aten.div.Tensor %5380, %5439 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5443 = torch.aten.add.Tensor %5442, %5441, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5444 = torch.aten.round %5443 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5445 = torch.aten.clamp %5444, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5446 = torch.aten.sub.Tensor %5445, %5441, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5447 = torch.aten.mul.Tensor %5446, %5439 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5448 = torch.aten.broadcast_to %573, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5449 = torch.aten.clone %5448, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5450 = torch.aten.view %5449, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5451 = torch.aten.mul.Tensor %572, %5450 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5452 = torch.aten.transpose.int %5451, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5453 = torch.aten.view %5447, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5454 = torch.aten.mm %5453, %5452 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5455 = torch.aten.view %5454, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5456 = torch.aten.view %5432, %5404 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,20,64],f16>
    %5457 = torch.aten.permute %5456, %1239 : !torch.vtensor<[4,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,144,64],f16>
    %5458 = torch.aten.clone %5457, %int0 : !torch.vtensor<[4,20,144,64],f16>, !torch.int -> !torch.vtensor<[4,20,144,64],f16>
    %5459 = torch.aten.view %5458, %5408 : !torch.vtensor<[4,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[80,144,64],f16>
    %5460 = torch.aten.view %5455, %5404 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,20,64],f16>
    %5461 = torch.aten.permute %5460, %1239 : !torch.vtensor<[4,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,144,64],f16>
    %5462 = torch.aten.clone %5461, %int0 : !torch.vtensor<[4,20,144,64],f16>, !torch.int -> !torch.vtensor<[4,20,144,64],f16>
    %5463 = torch.aten.view %5462, %5408 : !torch.vtensor<[4,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[80,144,64],f16>
    %5464 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5465 = torch.aten.to.dtype %5464, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5466 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5467 = torch.aten.broadcast_to %5465, %5466 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,144,64],f32>
    %5468 = torch.aten.to.dtype %5409, %int6, %false, %false, %none : !torch.vtensor<[80,144,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,144,64],f32>
    %5469 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5470 = torch.aten.broadcast_to %5468, %5469 : !torch.vtensor<[80,144,64],f32>, !torch.list<int> -> !torch.vtensor<[80,144,64],f32>
    %5471 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5472 = torch.aten.to.dtype %5471, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5473 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5474 = torch.aten.broadcast_to %5472, %5473 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,144,64],f32>
    %5475 = torch.aten.to.dtype %5459, %int6, %false, %false, %none : !torch.vtensor<[80,144,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,144,64],f32>
    %5476 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5477 = torch.aten.broadcast_to %5475, %5476 : !torch.vtensor<[80,144,64],f32>, !torch.list<int> -> !torch.vtensor<[80,144,64],f32>
    %5478 = torch.prim.ListConstruct %int80, %int144, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5479 = torch.aten.empty.memory_format %5478, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[80,144,144],f32>
    %5480 = torch.aten.transpose.int %5477, %int-1, %int-2 : !torch.vtensor<[80,144,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[80,64,144],f32>
    %5481 = torch.aten.bmm %5470, %5480 : !torch.vtensor<[80,144,64],f32>, !torch.vtensor<[80,64,144],f32> -> !torch.vtensor<[80,144,144],f32>
    %5482 = torch.aten.mul.Scalar %5481, %float1.250000e-01 : !torch.vtensor<[80,144,144],f32>, !torch.float -> !torch.vtensor<[80,144,144],f32>
    %5483 = torch.aten.add.Tensor %5482, %5479, %int0 : !torch.vtensor<[80,144,144],f32>, !torch.vtensor<[80,144,144],f32>, !torch.int -> !torch.vtensor<[80,144,144],f32>
    %values_220, %indices_221 = torch.aten.max.dim %5483, %int-1, %true : !torch.vtensor<[80,144,144],f32>, !torch.int, !torch.bool -> !torch.vtensor<[80,144,1],f32>, !torch.vtensor<[80,144,1],si64>
    %5484 = torch.aten.sub.Tensor %5483, %values_220, %float1.000000e00 : !torch.vtensor<[80,144,144],f32>, !torch.vtensor<[80,144,1],f32>, !torch.float -> !torch.vtensor<[80,144,144],f32>
    %5485 = torch.aten.exp %5484 : !torch.vtensor<[80,144,144],f32> -> !torch.vtensor<[80,144,144],f32>
    %5486 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5487 = torch.aten.sum.dim_IntList %5485, %5486, %true, %none : !torch.vtensor<[80,144,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[80,144,1],f32>
    %5488 = torch.aten.div.Tensor %5485, %5487 : !torch.vtensor<[80,144,144],f32>, !torch.vtensor<[80,144,1],f32> -> !torch.vtensor<[80,144,144],f32>
    %5489 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5490 = torch.aten.to.dtype %5489, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5491 = torch.prim.ListConstruct %int80, %int144, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5492 = torch.aten.broadcast_to %5490, %5491 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[80,144,144],f16>
    %5493 = torch.aten.to.dtype %5488, %int5, %false, %false, %none : !torch.vtensor<[80,144,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,144,144],f16>
    %5494 = torch.prim.ListConstruct %int80, %int144, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5495 = torch.aten.broadcast_to %5493, %5494 : !torch.vtensor<[80,144,144],f16>, !torch.list<int> -> !torch.vtensor<[80,144,144],f16>
    %5496 = torch.aten.bmm %5495, %5463 : !torch.vtensor<[80,144,144],f16>, !torch.vtensor<[80,144,64],f16> -> !torch.vtensor<[80,144,64],f16>
    %5497 = torch.prim.ListConstruct %int4, %int20, %int144, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5498 = torch.aten.view %5496, %5497 : !torch.vtensor<[80,144,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,144,64],f16>
    %5499 = torch.aten.permute %5498, %1239 : !torch.vtensor<[4,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[4,144,20,64],f16>
    %5500 = torch.aten.clone %5499, %int0 : !torch.vtensor<[4,144,20,64],f16>, !torch.int -> !torch.vtensor<[4,144,20,64],f16>
    %5501 = torch.aten.view %5500, %5334 : !torch.vtensor<[4,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5502 = torch.aten.view %5501, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5503 = torch.aten.abs %5502 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_222, %indices_223 = torch.aten.max.dim %5503, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5504 = torch.aten.view %values_222, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5505 = torch.aten.broadcast_to %5504, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5506 = torch.aten.clone %5505, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5507 = torch.aten.view %5506, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5508 = torch.aten.div.Scalar %5507, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5509 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5510 = torch.aten.detach %5509 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5511 = torch.aten.div.Tensor %5501, %5508 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5512 = torch.aten.add.Tensor %5511, %5510, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5513 = torch.aten.round %5512 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5514 = torch.aten.clamp %5513, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5515 = torch.aten.sub.Tensor %5514, %5510, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5516 = torch.aten.mul.Tensor %5515, %5508 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5517 = torch.aten.broadcast_to %571, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5518 = torch.aten.clone %5517, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5519 = torch.aten.view %5518, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5520 = torch.aten.mul.Tensor %570, %5519 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5521 = torch.aten.transpose.int %5520, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5522 = torch.aten.view %5516, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5523 = torch.aten.mm %5522, %5521 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5524 = torch.aten.mul.Scalar %569, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5525 = torch.aten.add.Tensor %5524, %5523, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[576,1280],f16>, !torch.int -> !torch.vtensor<[576,1280],f16>
    %5526 = torch.aten.view %5525, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5527 = torch.aten.add.Tensor %5526, %5364, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5528 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5529 = torch.aten.sum.dim_IntList %5527, %5528, %true, %none : !torch.vtensor<[4,144,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,144,1],f16>
    %5530 = torch.aten.div.Scalar %5529, %int1280 : !torch.vtensor<[4,144,1],f16>, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5531 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5532 = torch.aten.broadcast_to %5530, %5531 : !torch.vtensor<[4,144,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5533 = torch.aten.sub.Tensor %5527, %5532, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5534 = torch.aten.mul.Tensor %5533, %5533 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5535 = torch.aten.sum.dim_IntList %5534, %5528, %true, %none : !torch.vtensor<[4,144,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,144,1],f16>
    %5536 = torch.aten.div.Scalar %5535, %int1280 : !torch.vtensor<[4,144,1],f16>, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5537 = torch.aten.add.Scalar %5536, %float1.000000e-05, %int1 : !torch.vtensor<[4,144,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5538 = torch.aten.rsqrt %5537 : !torch.vtensor<[4,144,1],f16> -> !torch.vtensor<[4,144,1],f16>
    %5539 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5540 = torch.aten.broadcast_to %5538, %5539 : !torch.vtensor<[4,144,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5541 = torch.aten.mul.Tensor %5533, %5540 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5542 = torch.aten.mul.Tensor %5541, %568 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5543 = torch.aten.add.Tensor %5542, %567, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5544 = torch.aten.view %5543, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5545 = torch.aten.abs %5544 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_224, %indices_225 = torch.aten.max.dim %5545, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5546 = torch.aten.view %values_224, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5547 = torch.aten.broadcast_to %5546, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5548 = torch.aten.clone %5547, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5549 = torch.aten.view %5548, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5550 = torch.aten.div.Scalar %5549, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5551 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5552 = torch.aten.detach %5551 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5553 = torch.aten.div.Tensor %5543, %5550 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5554 = torch.aten.add.Tensor %5553, %5552, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5555 = torch.aten.round %5554 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5556 = torch.aten.clamp %5555, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5557 = torch.aten.sub.Tensor %5556, %5552, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5558 = torch.aten.mul.Tensor %5557, %5550 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5559 = torch.aten.broadcast_to %566, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5560 = torch.aten.clone %5559, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5561 = torch.aten.view %5560, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5562 = torch.aten.mul.Tensor %565, %5561 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5563 = torch.aten.transpose.int %5562, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5564 = torch.aten.view %5558, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5565 = torch.aten.mm %5564, %5563 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5566 = torch.aten.view %5565, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5567 = torch.aten.view %5566, %5404 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,20,64],f16>
    %5568 = torch.aten.permute %5567, %1239 : !torch.vtensor<[4,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,144,64],f16>
    %5569 = torch.aten.clone %5568, %int0 : !torch.vtensor<[4,20,144,64],f16>, !torch.int -> !torch.vtensor<[4,20,144,64],f16>
    %5570 = torch.aten.view %5569, %5408 : !torch.vtensor<[4,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[80,144,64],f16>
    %5571 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %5572 = torch.aten.abs %5571 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_226, %indices_227 = torch.aten.max.dim %5572, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %5573 = torch.aten.view %values_226, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %5574 = torch.aten.broadcast_to %5573, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %5575 = torch.aten.clone %5574, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %5576 = torch.aten.view %5575, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %5577 = torch.aten.div.Scalar %5576, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5578 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5579 = torch.aten.detach %5578 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5580 = torch.aten.div.Tensor %arg2, %5577 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %5581 = torch.aten.add.Tensor %5580, %5579, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5582 = torch.aten.round %5581 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %5583 = torch.aten.clamp %5582, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5584 = torch.aten.sub.Tensor %5583, %5579, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5585 = torch.aten.mul.Tensor %5584, %5577 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %5586 = torch.aten.broadcast_to %564, %4046 : !torch.vtensor<[1280,64,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,64,16],f16>
    %5587 = torch.aten.clone %5586, %int0 : !torch.vtensor<[1280,64,16],f16>, !torch.int -> !torch.vtensor<[1280,64,16],f16>
    %5588 = torch.aten.view %5587, %4049 : !torch.vtensor<[1280,64,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1024],f16>
    %5589 = torch.aten.mul.Tensor %563, %5588 : !torch.vtensor<[1280,1024],si8>, !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1280,1024],f16>
    %5590 = torch.aten.transpose.int %5589, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16>
    %5591 = torch.aten.view %5585, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %5592 = torch.aten.mm %5591, %5590 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[308,1280],f16>
    %5593 = torch.aten.view %5592, %4055 : !torch.vtensor<[308,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1280],f16>
    %5594 = torch.aten.view %arg2, %1405 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %5595 = torch.aten.abs %5594 : !torch.vtensor<[4,77,64,16],f16> -> !torch.vtensor<[4,77,64,16],f16>
    %values_228, %indices_229 = torch.aten.max.dim %5595, %int3, %true : !torch.vtensor<[4,77,64,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,77,64,1],f16>, !torch.vtensor<[4,77,64,1],si64>
    %5596 = torch.aten.view %values_228, %1408 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,1],f16>
    %5597 = torch.aten.broadcast_to %5596, %1405 : !torch.vtensor<[4,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[4,77,64,16],f16>
    %5598 = torch.aten.clone %5597, %int0 : !torch.vtensor<[4,77,64,16],f16>, !torch.int -> !torch.vtensor<[4,77,64,16],f16>
    %5599 = torch.aten.view %5598, %1412 : !torch.vtensor<[4,77,64,16],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1024],f16>
    %5600 = torch.aten.div.Scalar %5599, %int128 : !torch.vtensor<[4,77,1024],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5601 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5602 = torch.aten.detach %5601 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5603 = torch.aten.div.Tensor %arg2, %5600 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %5604 = torch.aten.add.Tensor %5603, %5602, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5605 = torch.aten.round %5604 : !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %5606 = torch.aten.clamp %5605, %int-128, %int127 : !torch.vtensor<[4,77,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5607 = torch.aten.sub.Tensor %5606, %5602, %int1 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,77,1024],f16>
    %5608 = torch.aten.mul.Tensor %5607, %5600 : !torch.vtensor<[4,77,1024],f16>, !torch.vtensor<[4,77,1024],f16> -> !torch.vtensor<[4,77,1024],f16>
    %5609 = torch.aten.broadcast_to %562, %4046 : !torch.vtensor<[1280,64,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,64,16],f16>
    %5610 = torch.aten.clone %5609, %int0 : !torch.vtensor<[1280,64,16],f16>, !torch.int -> !torch.vtensor<[1280,64,16],f16>
    %5611 = torch.aten.view %5610, %4049 : !torch.vtensor<[1280,64,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1024],f16>
    %5612 = torch.aten.mul.Tensor %561, %5611 : !torch.vtensor<[1280,1024],si8>, !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1280,1024],f16>
    %5613 = torch.aten.transpose.int %5612, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16>
    %5614 = torch.aten.view %5608, %1430 : !torch.vtensor<[4,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[308,1024],f16>
    %5615 = torch.aten.mm %5614, %5613 : !torch.vtensor<[308,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[308,1280],f16>
    %5616 = torch.aten.view %5615, %4055 : !torch.vtensor<[308,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,1280],f16>
    %5617 = torch.aten.view %5593, %4080 : !torch.vtensor<[4,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,20,64],f16>
    %5618 = torch.aten.permute %5617, %1239 : !torch.vtensor<[4,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,77,64],f16>
    %5619 = torch.aten.clone %5618, %int0 : !torch.vtensor<[4,20,77,64],f16>, !torch.int -> !torch.vtensor<[4,20,77,64],f16>
    %5620 = torch.aten.view %5619, %4084 : !torch.vtensor<[4,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[80,77,64],f16>
    %5621 = torch.aten.view %5616, %4080 : !torch.vtensor<[4,77,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,77,20,64],f16>
    %5622 = torch.aten.permute %5621, %1239 : !torch.vtensor<[4,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,77,64],f16>
    %5623 = torch.aten.clone %5622, %int0 : !torch.vtensor<[4,20,77,64],f16>, !torch.int -> !torch.vtensor<[4,20,77,64],f16>
    %5624 = torch.aten.view %5623, %4084 : !torch.vtensor<[4,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[80,77,64],f16>
    %5625 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5626 = torch.aten.to.dtype %5625, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5627 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5628 = torch.aten.broadcast_to %5626, %5627 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,144,64],f32>
    %5629 = torch.aten.to.dtype %5570, %int6, %false, %false, %none : !torch.vtensor<[80,144,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,144,64],f32>
    %5630 = torch.prim.ListConstruct %int80, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5631 = torch.aten.broadcast_to %5629, %5630 : !torch.vtensor<[80,144,64],f32>, !torch.list<int> -> !torch.vtensor<[80,144,64],f32>
    %5632 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5633 = torch.aten.to.dtype %5632, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5634 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5635 = torch.aten.broadcast_to %5633, %5634 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[80,77,64],f32>
    %5636 = torch.aten.to.dtype %5620, %int6, %false, %false, %none : !torch.vtensor<[80,77,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,77,64],f32>
    %5637 = torch.prim.ListConstruct %int80, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5638 = torch.aten.broadcast_to %5636, %5637 : !torch.vtensor<[80,77,64],f32>, !torch.list<int> -> !torch.vtensor<[80,77,64],f32>
    %5639 = torch.prim.ListConstruct %int80, %int144, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5640 = torch.aten.empty.memory_format %5639, %int6, %none, %cuda3A0, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[80,144,77],f32>
    %5641 = torch.aten.transpose.int %5638, %int-1, %int-2 : !torch.vtensor<[80,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[80,64,77],f32>
    %5642 = torch.aten.bmm %5631, %5641 : !torch.vtensor<[80,144,64],f32>, !torch.vtensor<[80,64,77],f32> -> !torch.vtensor<[80,144,77],f32>
    %5643 = torch.aten.mul.Scalar %5642, %float1.250000e-01 : !torch.vtensor<[80,144,77],f32>, !torch.float -> !torch.vtensor<[80,144,77],f32>
    %5644 = torch.aten.add.Tensor %5643, %5640, %int0 : !torch.vtensor<[80,144,77],f32>, !torch.vtensor<[80,144,77],f32>, !torch.int -> !torch.vtensor<[80,144,77],f32>
    %values_230, %indices_231 = torch.aten.max.dim %5644, %int-1, %true : !torch.vtensor<[80,144,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[80,144,1],f32>, !torch.vtensor<[80,144,1],si64>
    %5645 = torch.aten.sub.Tensor %5644, %values_230, %float1.000000e00 : !torch.vtensor<[80,144,77],f32>, !torch.vtensor<[80,144,1],f32>, !torch.float -> !torch.vtensor<[80,144,77],f32>
    %5646 = torch.aten.exp %5645 : !torch.vtensor<[80,144,77],f32> -> !torch.vtensor<[80,144,77],f32>
    %5647 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %5648 = torch.aten.sum.dim_IntList %5646, %5647, %true, %none : !torch.vtensor<[80,144,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[80,144,1],f32>
    %5649 = torch.aten.div.Tensor %5646, %5648 : !torch.vtensor<[80,144,77],f32>, !torch.vtensor<[80,144,1],f32> -> !torch.vtensor<[80,144,77],f32>
    %5650 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5651 = torch.aten.to.dtype %5650, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5652 = torch.prim.ListConstruct %int80, %int144, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5653 = torch.aten.broadcast_to %5651, %5652 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[80,144,77],f16>
    %5654 = torch.aten.to.dtype %5649, %int5, %false, %false, %none : !torch.vtensor<[80,144,77],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[80,144,77],f16>
    %5655 = torch.prim.ListConstruct %int80, %int144, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5656 = torch.aten.broadcast_to %5654, %5655 : !torch.vtensor<[80,144,77],f16>, !torch.list<int> -> !torch.vtensor<[80,144,77],f16>
    %5657 = torch.aten.bmm %5656, %5624 : !torch.vtensor<[80,144,77],f16>, !torch.vtensor<[80,77,64],f16> -> !torch.vtensor<[80,144,64],f16>
    %5658 = torch.aten.view %5657, %5497 : !torch.vtensor<[80,144,64],f16>, !torch.list<int> -> !torch.vtensor<[4,20,144,64],f16>
    %5659 = torch.aten.permute %5658, %1239 : !torch.vtensor<[4,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[4,144,20,64],f16>
    %5660 = torch.aten.clone %5659, %int0 : !torch.vtensor<[4,144,20,64],f16>, !torch.int -> !torch.vtensor<[4,144,20,64],f16>
    %5661 = torch.aten.view %5660, %5334 : !torch.vtensor<[4,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5662 = torch.aten.view %5661, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5663 = torch.aten.abs %5662 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_232, %indices_233 = torch.aten.max.dim %5663, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5664 = torch.aten.view %values_232, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5665 = torch.aten.broadcast_to %5664, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5666 = torch.aten.clone %5665, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5667 = torch.aten.view %5666, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5668 = torch.aten.div.Scalar %5667, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5669 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5670 = torch.aten.detach %5669 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5671 = torch.aten.div.Tensor %5661, %5668 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5672 = torch.aten.add.Tensor %5671, %5670, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5673 = torch.aten.round %5672 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5674 = torch.aten.clamp %5673, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5675 = torch.aten.sub.Tensor %5674, %5670, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5676 = torch.aten.mul.Tensor %5675, %5668 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5677 = torch.aten.broadcast_to %560, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5678 = torch.aten.clone %5677, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5679 = torch.aten.view %5678, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5680 = torch.aten.mul.Tensor %559, %5679 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5681 = torch.aten.transpose.int %5680, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5682 = torch.aten.view %5676, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5683 = torch.aten.mm %5682, %5681 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5684 = torch.aten.mul.Scalar %558, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5685 = torch.aten.add.Tensor %5684, %5683, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[576,1280],f16>, !torch.int -> !torch.vtensor<[576,1280],f16>
    %5686 = torch.aten.view %5685, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5687 = torch.aten.add.Tensor %5686, %5527, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5688 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %5689 = torch.aten.sum.dim_IntList %5687, %5688, %true, %none : !torch.vtensor<[4,144,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,144,1],f16>
    %5690 = torch.aten.div.Scalar %5689, %int1280 : !torch.vtensor<[4,144,1],f16>, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5691 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5692 = torch.aten.broadcast_to %5690, %5691 : !torch.vtensor<[4,144,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5693 = torch.aten.sub.Tensor %5687, %5692, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5694 = torch.aten.mul.Tensor %5693, %5693 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5695 = torch.aten.sum.dim_IntList %5694, %5688, %true, %none : !torch.vtensor<[4,144,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,144,1],f16>
    %5696 = torch.aten.div.Scalar %5695, %int1280 : !torch.vtensor<[4,144,1],f16>, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5697 = torch.aten.add.Scalar %5696, %float1.000000e-05, %int1 : !torch.vtensor<[4,144,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[4,144,1],f16>
    %5698 = torch.aten.rsqrt %5697 : !torch.vtensor<[4,144,1],f16> -> !torch.vtensor<[4,144,1],f16>
    %5699 = torch.prim.ListConstruct %int4, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5700 = torch.aten.broadcast_to %5698, %5699 : !torch.vtensor<[4,144,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5701 = torch.aten.mul.Tensor %5693, %5700 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5702 = torch.aten.mul.Tensor %5701, %557 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5703 = torch.aten.add.Tensor %5702, %556, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5704 = torch.aten.view %5703, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5705 = torch.aten.abs %5704 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_234, %indices_235 = torch.aten.max.dim %5705, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5706 = torch.aten.view %values_234, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5707 = torch.aten.broadcast_to %5706, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5708 = torch.aten.clone %5707, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5709 = torch.aten.view %5708, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5710 = torch.aten.div.Scalar %5709, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5711 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5712 = torch.aten.detach %5711 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5713 = torch.aten.div.Tensor %5703, %5710 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5714 = torch.aten.add.Tensor %5713, %5712, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5715 = torch.aten.round %5714 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5716 = torch.aten.clamp %5715, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5717 = torch.aten.sub.Tensor %5716, %5712, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5718 = torch.aten.mul.Tensor %5717, %5710 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5719 = torch.aten.broadcast_to %555, %4184 : !torch.vtensor<[10240,80,1],f16>, !torch.list<int> -> !torch.vtensor<[10240,80,16],f16>
    %5720 = torch.aten.clone %5719, %int0 : !torch.vtensor<[10240,80,16],f16>, !torch.int -> !torch.vtensor<[10240,80,16],f16>
    %5721 = torch.aten.view %5720, %4187 : !torch.vtensor<[10240,80,16],f16>, !torch.list<int> -> !torch.vtensor<[10240,1280],f16>
    %5722 = torch.aten.mul.Tensor %554, %5721 : !torch.vtensor<[10240,1280],si8>, !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[10240,1280],f16>
    %5723 = torch.aten.transpose.int %5722, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16>
    %5724 = torch.aten.view %5718, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5725 = torch.aten.mm %5724, %5723 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[576,10240],f16>
    %5726 = torch.aten.mul.Scalar %553, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16>
    %5727 = torch.aten.add.Tensor %5726, %5725, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[576,10240],f16>, !torch.int -> !torch.vtensor<[576,10240],f16>
    %5728 = torch.prim.ListConstruct %int4, %int144, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5729 = torch.aten.view %5727, %5728 : !torch.vtensor<[576,10240],f16>, !torch.list<int> -> !torch.vtensor<[4,144,10240],f16>
    %5730 = torch.aten.slice.Tensor %5729, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[4,144,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,144,5120],f16>
    %5731 = torch.aten.slice.Tensor %5729, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[4,144,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,144,5120],f16>
    %5732 = torch.aten.gelu %5731, %str : !torch.vtensor<[4,144,5120],f16>, !torch.str -> !torch.vtensor<[4,144,5120],f16>
    %5733 = torch.aten.mul.Tensor %5730, %5732 : !torch.vtensor<[4,144,5120],f16>, !torch.vtensor<[4,144,5120],f16> -> !torch.vtensor<[4,144,5120],f16>
    %5734 = torch.prim.ListConstruct %int4, %int144, %int320, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5735 = torch.aten.view %5733, %5734 : !torch.vtensor<[4,144,5120],f16>, !torch.list<int> -> !torch.vtensor<[4,144,320,16],f16>
    %5736 = torch.aten.abs %5735 : !torch.vtensor<[4,144,320,16],f16> -> !torch.vtensor<[4,144,320,16],f16>
    %values_236, %indices_237 = torch.aten.max.dim %5736, %int3, %true : !torch.vtensor<[4,144,320,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,320,1],f16>, !torch.vtensor<[4,144,320,1],si64>
    %5737 = torch.prim.ListConstruct %int4, %int144, %int320, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5738 = torch.aten.view %values_236, %5737 : !torch.vtensor<[4,144,320,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,320,1],f16>
    %5739 = torch.aten.broadcast_to %5738, %5734 : !torch.vtensor<[4,144,320,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,320,16],f16>
    %5740 = torch.aten.clone %5739, %int0 : !torch.vtensor<[4,144,320,16],f16>, !torch.int -> !torch.vtensor<[4,144,320,16],f16>
    %5741 = torch.prim.ListConstruct %int4, %int144, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5742 = torch.aten.view %5740, %5741 : !torch.vtensor<[4,144,320,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,5120],f16>
    %5743 = torch.aten.div.Scalar %5742, %int128 : !torch.vtensor<[4,144,5120],f16>, !torch.int -> !torch.vtensor<[4,144,5120],f16>
    %5744 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5745 = torch.aten.detach %5744 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5746 = torch.aten.div.Tensor %5733, %5743 : !torch.vtensor<[4,144,5120],f16>, !torch.vtensor<[4,144,5120],f16> -> !torch.vtensor<[4,144,5120],f16>
    %5747 = torch.aten.add.Tensor %5746, %5745, %int1 : !torch.vtensor<[4,144,5120],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,5120],f16>
    %5748 = torch.aten.round %5747 : !torch.vtensor<[4,144,5120],f16> -> !torch.vtensor<[4,144,5120],f16>
    %5749 = torch.aten.clamp %5748, %int-128, %int127 : !torch.vtensor<[4,144,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,5120],f16>
    %5750 = torch.aten.sub.Tensor %5749, %5745, %int1 : !torch.vtensor<[4,144,5120],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,5120],f16>
    %5751 = torch.aten.mul.Tensor %5750, %5743 : !torch.vtensor<[4,144,5120],f16>, !torch.vtensor<[4,144,5120],f16> -> !torch.vtensor<[4,144,5120],f16>
    %5752 = torch.aten.broadcast_to %552, %4219 : !torch.vtensor<[1280,320,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,320,16],f16>
    %5753 = torch.aten.clone %5752, %int0 : !torch.vtensor<[1280,320,16],f16>, !torch.int -> !torch.vtensor<[1280,320,16],f16>
    %5754 = torch.aten.view %5753, %4222 : !torch.vtensor<[1280,320,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,5120],f16>
    %5755 = torch.aten.mul.Tensor %551, %5754 : !torch.vtensor<[1280,5120],si8>, !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[1280,5120],f16>
    %5756 = torch.aten.transpose.int %5755, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16>
    %5757 = torch.prim.ListConstruct %int576, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
    %5758 = torch.aten.view %5751, %5757 : !torch.vtensor<[4,144,5120],f16>, !torch.list<int> -> !torch.vtensor<[576,5120],f16>
    %5759 = torch.aten.mm %5758, %5756 : !torch.vtensor<[576,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5760 = torch.aten.mul.Scalar %550, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5761 = torch.aten.add.Tensor %5760, %5759, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[576,1280],f16>, !torch.int -> !torch.vtensor<[576,1280],f16>
    %5762 = torch.aten.view %5761, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5763 = torch.aten.add.Tensor %5762, %5687, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5764 = torch.aten.view %5763, %5337 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5765 = torch.aten.abs %5764 : !torch.vtensor<[4,144,80,16],f16> -> !torch.vtensor<[4,144,80,16],f16>
    %values_238, %indices_239 = torch.aten.max.dim %5765, %int3, %true : !torch.vtensor<[4,144,80,16],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,144,80,1],f16>, !torch.vtensor<[4,144,80,1],si64>
    %5766 = torch.aten.view %values_238, %5340 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,1],f16>
    %5767 = torch.aten.broadcast_to %5766, %5337 : !torch.vtensor<[4,144,80,1],f16>, !torch.list<int> -> !torch.vtensor<[4,144,80,16],f16>
    %5768 = torch.aten.clone %5767, %int0 : !torch.vtensor<[4,144,80,16],f16>, !torch.int -> !torch.vtensor<[4,144,80,16],f16>
    %5769 = torch.aten.view %5768, %5334 : !torch.vtensor<[4,144,80,16],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5770 = torch.aten.div.Scalar %5769, %int128 : !torch.vtensor<[4,144,1280],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5771 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5772 = torch.aten.detach %5771 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5773 = torch.aten.div.Tensor %5763, %5770 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5774 = torch.aten.add.Tensor %5773, %5772, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5775 = torch.aten.round %5774 : !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5776 = torch.aten.clamp %5775, %int-128, %int127 : !torch.vtensor<[4,144,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5777 = torch.aten.sub.Tensor %5776, %5772, %int1 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,144,1280],f16>
    %5778 = torch.aten.mul.Tensor %5777, %5770 : !torch.vtensor<[4,144,1280],f16>, !torch.vtensor<[4,144,1280],f16> -> !torch.vtensor<[4,144,1280],f16>
    %5779 = torch.aten.broadcast_to %549, %3812 : !torch.vtensor<[1280,80,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16],f16>
    %5780 = torch.aten.clone %5779, %int0 : !torch.vtensor<[1280,80,16],f16>, !torch.int -> !torch.vtensor<[1280,80,16],f16>
    %5781 = torch.aten.view %5780, %3815 : !torch.vtensor<[1280,80,16],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280],f16>
    %5782 = torch.aten.mul.Tensor %548, %5781 : !torch.vtensor<[1280,1280],si8>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
    %5783 = torch.aten.transpose.int %5782, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5784 = torch.aten.view %5778, %5359 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[576,1280],f16>
    %5785 = torch.aten.mm %5784, %5783 : !torch.vtensor<[576,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[576,1280],f16>
    %5786 = torch.aten.mul.Scalar %547, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5787 = torch.aten.add.Tensor %5786, %5785, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[576,1280],f16>, !torch.int -> !torch.vtensor<[576,1280],f16>
    %5788 = torch.aten.view %5787, %5334 : !torch.vtensor<[576,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,144,1280],f16>
    %5789 = torch.prim.ListConstruct %int4, %int12, %int12, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5790 = torch.aten.view %5788, %5789 : !torch.vtensor<[4,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,12,12,1280],f16>
    %5791 = torch.aten.permute %5790, %1638 : !torch.vtensor<[4,12,12,1280],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5792 = torch.aten.clone %5791, %int0 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5793 = torch.aten.add.Tensor %5792, %5294, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5794 = torch.aten.view %5793, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5795 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5796 = torch.aten.to.dtype %5795, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5797 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5798 = torch.aten.broadcast_to %5796, %5797 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5799 = torch.aten.to.dtype %5794, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5800 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5801 = torch.aten.broadcast_to %5799, %5800 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5802 = torch.aten.to.dtype %5801, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5803 = torch.aten.sum.dim_IntList %5802, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5804 = torch.aten.div.Scalar %5803, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5805 = torch.aten.sub.Tensor %5802, %5804, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5806 = torch.aten.mul.Tensor %5805, %5805 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5807 = torch.aten.sum.dim_IntList %5806, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5808 = torch.aten.div.Scalar %5807, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5809 = torch.aten.to.dtype %5808, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5810 = torch.aten.sum.dim_IntList %5801, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5811 = torch.aten.div.Scalar %5810, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5812 = torch.aten.add.Tensor %5809, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5813 = torch.aten.rsqrt %5812 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5814 = torch.aten.sub.Tensor %5794, %5811, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5815 = torch.aten.mul.Tensor %5814, %5813 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5816 = torch.aten.view %5815, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5817 = torch.aten.unsqueeze %546, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5818 = torch.aten.unsqueeze %5817, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5819 = torch.aten.unsqueeze %5818, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5820 = torch.aten.unsqueeze %545, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5821 = torch.aten.unsqueeze %5820, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5822 = torch.aten.unsqueeze %5821, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5823 = torch.aten.mul.Tensor %5816, %5822 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5824 = torch.aten.add.Tensor %5823, %5819, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5825 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5826 = torch.aten.to.dtype %5825, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5827 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5828 = torch.aten.broadcast_to %5826, %5827 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5829 = torch.aten.to.dtype %5824, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5830 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5831 = torch.aten.broadcast_to %5829, %5830 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5832 = torch.aten.sigmoid %5831 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5833 = torch.aten.mul.Tensor %5832, %5831 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5834 = torch.aten.view %5833, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5835 = torch.aten.abs %5834 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_240, %indices_241 = torch.aten.max.dim %5835, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5836 = torch.aten.view %values_240, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5837 = torch.aten.broadcast_to %5836, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5838 = torch.aten.clone %5837, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5839 = torch.aten.view %5838, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5840 = torch.aten.div.Scalar %5839, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5841 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5842 = torch.aten.detach %5841 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5843 = torch.aten.div.Tensor %5833, %5840 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5844 = torch.aten.add.Tensor %5843, %5842, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5845 = torch.aten.round %5844 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5846 = torch.aten.clamp %5845, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5847 = torch.aten.sub.Tensor %5846, %5842, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5848 = torch.aten.mul.Tensor %5847, %5840 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5849 = torch.aten.broadcast_to %544, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5850 = torch.aten.clone %5849, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5851 = torch.aten.view %5850, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5852 = torch.aten.mul.Tensor %543, %5851 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5853 = torch.aten.convolution %5848, %5852, %542, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5854 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5855 = torch.aten.mul.Tensor %5854, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5856 = torch.aten.transpose.int %541, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5857 = torch.aten.mm %5855, %5856 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5858 = torch.aten.mul.Scalar %540, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5859 = torch.aten.add.Tensor %5858, %5857, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %5860 = torch.aten.unsqueeze %5859, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %5861 = torch.aten.unsqueeze %5860, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %5862 = torch.aten.add.Tensor %5853, %5861, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5863 = torch.aten.view %5862, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %5864 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5865 = torch.aten.to.dtype %5864, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5866 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5867 = torch.aten.broadcast_to %5865, %5866 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5868 = torch.aten.to.dtype %5863, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %5869 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5870 = torch.aten.broadcast_to %5868, %5869 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %5871 = torch.aten.to.dtype %5870, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %5872 = torch.aten.sum.dim_IntList %5871, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5873 = torch.aten.div.Scalar %5872, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5874 = torch.aten.sub.Tensor %5871, %5873, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %5875 = torch.aten.mul.Tensor %5874, %5874 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %5876 = torch.aten.sum.dim_IntList %5875, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5877 = torch.aten.div.Scalar %5876, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5878 = torch.aten.to.dtype %5877, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5879 = torch.aten.sum.dim_IntList %5870, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5880 = torch.aten.div.Scalar %5879, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5881 = torch.aten.add.Tensor %5878, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5882 = torch.aten.rsqrt %5881 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5883 = torch.aten.sub.Tensor %5863, %5880, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %5884 = torch.aten.mul.Tensor %5883, %5882 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %5885 = torch.aten.view %5884, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %5886 = torch.aten.unsqueeze %539, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5887 = torch.aten.unsqueeze %5886, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5888 = torch.aten.unsqueeze %5887, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5889 = torch.aten.unsqueeze %538, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %5890 = torch.aten.unsqueeze %5889, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %5891 = torch.aten.unsqueeze %5890, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %5892 = torch.aten.mul.Tensor %5885, %5891 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %5893 = torch.aten.add.Tensor %5892, %5888, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %5894 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5895 = torch.aten.to.dtype %5894, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5896 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5897 = torch.aten.broadcast_to %5895, %5896 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5898 = torch.aten.to.dtype %5893, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %5899 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5900 = torch.aten.broadcast_to %5898, %5899 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5901 = torch.aten.sigmoid %5900 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5902 = torch.aten.mul.Tensor %5901, %5900 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5903 = torch.aten.view %5902, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5904 = torch.aten.abs %5903 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_242, %indices_243 = torch.aten.max.dim %5904, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %5905 = torch.aten.view %values_242, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %5906 = torch.aten.broadcast_to %5905, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %5907 = torch.aten.clone %5906, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %5908 = torch.aten.view %5907, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %5909 = torch.aten.div.Scalar %5908, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5910 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5911 = torch.aten.detach %5910 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5912 = torch.aten.div.Tensor %5902, %5909 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5913 = torch.aten.add.Tensor %5912, %5911, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5914 = torch.aten.round %5913 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5915 = torch.aten.clamp %5914, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5916 = torch.aten.sub.Tensor %5915, %5911, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5917 = torch.aten.mul.Tensor %5916, %5909 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %5918 = torch.aten.broadcast_to %537, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5919 = torch.aten.clone %5918, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %5920 = torch.aten.view %5919, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5921 = torch.aten.mul.Tensor %536, %5920 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %5922 = torch.aten.convolution %5917, %5921, %535, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5923 = torch.aten.add.Tensor %5793, %5922, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5924 = torch.aten.div.Scalar %5923, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5925 = torch.prim.ListConstruct %5924, %5163 : (!torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>) -> !torch.list<vtensor>
    %5926 = torch.aten.cat %5925, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %5927 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5928 = torch.aten.view %5926, %5927 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f16>
    %5929 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5930 = torch.aten.to.dtype %5929, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %5931 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5932 = torch.aten.broadcast_to %5930, %5931 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f32>
    %5933 = torch.aten.to.dtype %5928, %int6, %false, %false, %none : !torch.vtensor<[4,32,80,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,144],f32>
    %5934 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5935 = torch.aten.broadcast_to %5933, %5934 : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f32>
    %5936 = torch.aten.to.dtype %5935, %int7, %false, %false, %none : !torch.vtensor<[4,32,80,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,144],f64>
    %5937 = torch.aten.sum.dim_IntList %5936, %996, %true, %none : !torch.vtensor<[4,32,80,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5938 = torch.aten.div.Scalar %5937, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5939 = torch.aten.sub.Tensor %5936, %5938, %float1.000000e00 : !torch.vtensor<[4,32,80,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,80,144],f64>
    %5940 = torch.aten.mul.Tensor %5939, %5939 : !torch.vtensor<[4,32,80,144],f64>, !torch.vtensor<[4,32,80,144],f64> -> !torch.vtensor<[4,32,80,144],f64>
    %5941 = torch.aten.sum.dim_IntList %5940, %996, %true, %none : !torch.vtensor<[4,32,80,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %5942 = torch.aten.div.Scalar %5941, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %5943 = torch.aten.to.dtype %5942, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5944 = torch.aten.sum.dim_IntList %5935, %996, %true, %none : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %5945 = torch.aten.div.Scalar %5944, %int11520 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5946 = torch.aten.add.Tensor %5943, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %5947 = torch.aten.rsqrt %5946 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %5948 = torch.aten.sub.Tensor %5928, %5945, %int1 : !torch.vtensor<[4,32,80,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,80,144],f32>
    %5949 = torch.aten.mul.Tensor %5948, %5947 : !torch.vtensor<[4,32,80,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,80,144],f32>
    %5950 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5951 = torch.aten.view %5949, %5950 : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f32>
    %5952 = torch.aten.unsqueeze %534, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %5953 = torch.aten.unsqueeze %5952, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %5954 = torch.aten.unsqueeze %5953, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %5955 = torch.aten.unsqueeze %533, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %5956 = torch.aten.unsqueeze %5955, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %5957 = torch.aten.unsqueeze %5956, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %5958 = torch.aten.mul.Tensor %5951, %5957 : !torch.vtensor<[4,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f16> -> !torch.vtensor<[4,2560,12,12],f32>
    %5959 = torch.aten.add.Tensor %5958, %5954, %int1 : !torch.vtensor<[4,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f32>
    %5960 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %5961 = torch.aten.to.dtype %5960, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %5962 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5963 = torch.aten.broadcast_to %5961, %5962 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %5964 = torch.aten.to.dtype %5959, %int5, %false, %false, %none : !torch.vtensor<[4,2560,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,2560,12,12],f16>
    %5965 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5966 = torch.aten.broadcast_to %5964, %5965 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %5967 = torch.aten.sigmoid %5966 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %5968 = torch.aten.mul.Tensor %5967, %5966 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %5969 = torch.prim.ListConstruct %int4, %int160, %int16, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5970 = torch.aten.view %5968, %5969 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %5971 = torch.aten.abs %5970 : !torch.vtensor<[4,160,16,12,12],f16> -> !torch.vtensor<[4,160,16,12,12],f16>
    %values_244, %indices_245 = torch.aten.max.dim %5971, %int2, %true : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,12,12],f16>, !torch.vtensor<[4,160,1,12,12],si64>
    %5972 = torch.prim.ListConstruct %int4, %int160, %int1, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5973 = torch.aten.view %values_244, %5972 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,12,12],f16>
    %5974 = torch.aten.broadcast_to %5973, %5969 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %5975 = torch.aten.clone %5974, %int0 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,160,16,12,12],f16>
    %5976 = torch.aten.view %5975, %5950 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %5977 = torch.aten.div.Scalar %5976, %int128 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %5978 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5979 = torch.aten.detach %5978 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %5980 = torch.aten.div.Tensor %5968, %5977 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %5981 = torch.aten.add.Tensor %5980, %5979, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %5982 = torch.aten.round %5981 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %5983 = torch.aten.clamp %5982, %int-128, %int127 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %5984 = torch.aten.sub.Tensor %5983, %5979, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %5985 = torch.aten.mul.Tensor %5984, %5977 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %5986 = torch.prim.ListConstruct %int1280, %int160, %int16, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5987 = torch.aten.broadcast_to %532, %5986 : !torch.vtensor<[1280,160,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,3,3],f16>
    %5988 = torch.aten.clone %5987, %int0 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,160,16,3,3],f16>
    %5989 = torch.prim.ListConstruct %int1280, %int2560, %int3, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5990 = torch.aten.view %5988, %5989 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,3,3],f16>
    %5991 = torch.aten.mul.Tensor %531, %5990 : !torch.vtensor<[1280,2560,3,3],si8>, !torch.vtensor<[1280,2560,3,3],f16> -> !torch.vtensor<[1280,2560,3,3],f16>
    %5992 = torch.aten.convolution %5985, %5991, %530, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %5993 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5994 = torch.aten.mul.Tensor %5993, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5995 = torch.aten.transpose.int %529, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %5996 = torch.aten.mm %5994, %5995 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %5997 = torch.aten.mul.Scalar %528, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %5998 = torch.aten.add.Tensor %5997, %5996, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %5999 = torch.aten.unsqueeze %5998, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %6000 = torch.aten.unsqueeze %5999, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %6001 = torch.aten.add.Tensor %5992, %6000, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6002 = torch.aten.view %6001, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %6003 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6004 = torch.aten.to.dtype %6003, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6005 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6006 = torch.aten.broadcast_to %6004, %6005 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %6007 = torch.aten.to.dtype %6002, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %6008 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6009 = torch.aten.broadcast_to %6007, %6008 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %6010 = torch.aten.to.dtype %6009, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %6011 = torch.aten.sum.dim_IntList %6010, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6012 = torch.aten.div.Scalar %6011, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6013 = torch.aten.sub.Tensor %6010, %6012, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %6014 = torch.aten.mul.Tensor %6013, %6013 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %6015 = torch.aten.sum.dim_IntList %6014, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6016 = torch.aten.div.Scalar %6015, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6017 = torch.aten.to.dtype %6016, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6018 = torch.aten.sum.dim_IntList %6009, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6019 = torch.aten.div.Scalar %6018, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6020 = torch.aten.add.Tensor %6017, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6021 = torch.aten.rsqrt %6020 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6022 = torch.aten.sub.Tensor %6002, %6019, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %6023 = torch.aten.mul.Tensor %6022, %6021 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %6024 = torch.aten.view %6023, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %6025 = torch.aten.unsqueeze %527, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6026 = torch.aten.unsqueeze %6025, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6027 = torch.aten.unsqueeze %6026, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6028 = torch.aten.unsqueeze %526, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6029 = torch.aten.unsqueeze %6028, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6030 = torch.aten.unsqueeze %6029, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6031 = torch.aten.mul.Tensor %6024, %6030 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %6032 = torch.aten.add.Tensor %6031, %6027, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %6033 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6034 = torch.aten.to.dtype %6033, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6035 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6036 = torch.aten.broadcast_to %6034, %6035 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6037 = torch.aten.to.dtype %6032, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %6038 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6039 = torch.aten.broadcast_to %6037, %6038 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6040 = torch.aten.sigmoid %6039 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6041 = torch.aten.mul.Tensor %6040, %6039 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6042 = torch.aten.view %6041, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %6043 = torch.aten.abs %6042 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_246, %indices_247 = torch.aten.max.dim %6043, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %6044 = torch.aten.view %values_246, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %6045 = torch.aten.broadcast_to %6044, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %6046 = torch.aten.clone %6045, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %6047 = torch.aten.view %6046, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6048 = torch.aten.div.Scalar %6047, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6049 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6050 = torch.aten.detach %6049 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6051 = torch.aten.div.Tensor %6041, %6048 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6052 = torch.aten.add.Tensor %6051, %6050, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6053 = torch.aten.round %6052 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6054 = torch.aten.clamp %6053, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6055 = torch.aten.sub.Tensor %6054, %6050, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6056 = torch.aten.mul.Tensor %6055, %6048 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6057 = torch.aten.broadcast_to %525, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6058 = torch.aten.clone %6057, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6059 = torch.aten.view %6058, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6060 = torch.aten.mul.Tensor %524, %6059 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6061 = torch.aten.convolution %6056, %6060, %523, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6062 = torch.aten.view %5926, %5969 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6063 = torch.aten.abs %6062 : !torch.vtensor<[4,160,16,12,12],f16> -> !torch.vtensor<[4,160,16,12,12],f16>
    %values_248, %indices_249 = torch.aten.max.dim %6063, %int2, %true : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,12,12],f16>, !torch.vtensor<[4,160,1,12,12],si64>
    %6064 = torch.aten.view %values_248, %5972 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,12,12],f16>
    %6065 = torch.aten.broadcast_to %6064, %5969 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6066 = torch.aten.clone %6065, %int0 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,160,16,12,12],f16>
    %6067 = torch.aten.view %6066, %5950 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6068 = torch.aten.div.Scalar %6067, %int128 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6069 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6070 = torch.aten.detach %6069 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6071 = torch.aten.div.Tensor %5926, %6068 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6072 = torch.aten.add.Tensor %6071, %6070, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6073 = torch.aten.round %6072 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6074 = torch.aten.clamp %6073, %int-128, %int127 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6075 = torch.aten.sub.Tensor %6074, %6070, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6076 = torch.aten.mul.Tensor %6075, %6068 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6077 = torch.prim.ListConstruct %int1280, %int160, %int16, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6078 = torch.aten.broadcast_to %522, %6077 : !torch.vtensor<[1280,160,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6079 = torch.aten.clone %6078, %int0 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6080 = torch.prim.ListConstruct %int1280, %int2560, %int1, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6081 = torch.aten.view %6079, %6080 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6082 = torch.aten.mul.Tensor %521, %6081 : !torch.vtensor<[1280,2560,1,1],si8>, !torch.vtensor<[1280,2560,1,1],f16> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6083 = torch.aten.convolution %6076, %6082, %520, %984, %985, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6084 = torch.aten.add.Tensor %6083, %6061, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6085 = torch.aten.div.Tensor %6084, %925 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6086 = torch.prim.ListConstruct %6085, %5032 : (!torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>) -> !torch.list<vtensor>
    %6087 = torch.aten.cat %6086, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6088 = torch.aten.view %6087, %5927 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f16>
    %6089 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6090 = torch.aten.to.dtype %6089, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6091 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6092 = torch.aten.broadcast_to %6090, %6091 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f32>
    %6093 = torch.aten.to.dtype %6088, %int6, %false, %false, %none : !torch.vtensor<[4,32,80,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,144],f32>
    %6094 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6095 = torch.aten.broadcast_to %6093, %6094 : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f32>
    %6096 = torch.aten.to.dtype %6095, %int7, %false, %false, %none : !torch.vtensor<[4,32,80,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,144],f64>
    %6097 = torch.aten.sum.dim_IntList %6096, %996, %true, %none : !torch.vtensor<[4,32,80,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6098 = torch.aten.div.Scalar %6097, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6099 = torch.aten.sub.Tensor %6096, %6098, %float1.000000e00 : !torch.vtensor<[4,32,80,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,80,144],f64>
    %6100 = torch.aten.mul.Tensor %6099, %6099 : !torch.vtensor<[4,32,80,144],f64>, !torch.vtensor<[4,32,80,144],f64> -> !torch.vtensor<[4,32,80,144],f64>
    %6101 = torch.aten.sum.dim_IntList %6100, %996, %true, %none : !torch.vtensor<[4,32,80,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6102 = torch.aten.div.Scalar %6101, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6103 = torch.aten.to.dtype %6102, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6104 = torch.aten.sum.dim_IntList %6095, %996, %true, %none : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6105 = torch.aten.div.Scalar %6104, %int11520 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6106 = torch.aten.add.Tensor %6103, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6107 = torch.aten.rsqrt %6106 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6108 = torch.aten.sub.Tensor %6088, %6105, %int1 : !torch.vtensor<[4,32,80,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,80,144],f32>
    %6109 = torch.aten.mul.Tensor %6108, %6107 : !torch.vtensor<[4,32,80,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,80,144],f32>
    %6110 = torch.aten.view %6109, %5950 : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f32>
    %6111 = torch.aten.unsqueeze %519, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %6112 = torch.aten.unsqueeze %6111, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %6113 = torch.aten.unsqueeze %6112, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %6114 = torch.aten.unsqueeze %518, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %6115 = torch.aten.unsqueeze %6114, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %6116 = torch.aten.unsqueeze %6115, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %6117 = torch.aten.mul.Tensor %6110, %6116 : !torch.vtensor<[4,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f16> -> !torch.vtensor<[4,2560,12,12],f32>
    %6118 = torch.aten.add.Tensor %6117, %6113, %int1 : !torch.vtensor<[4,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f32>
    %6119 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6120 = torch.aten.to.dtype %6119, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6121 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6122 = torch.aten.broadcast_to %6120, %6121 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6123 = torch.aten.to.dtype %6118, %int5, %false, %false, %none : !torch.vtensor<[4,2560,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,2560,12,12],f16>
    %6124 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6125 = torch.aten.broadcast_to %6123, %6124 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6126 = torch.aten.sigmoid %6125 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6127 = torch.aten.mul.Tensor %6126, %6125 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6128 = torch.aten.view %6127, %5969 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6129 = torch.aten.abs %6128 : !torch.vtensor<[4,160,16,12,12],f16> -> !torch.vtensor<[4,160,16,12,12],f16>
    %values_250, %indices_251 = torch.aten.max.dim %6129, %int2, %true : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,12,12],f16>, !torch.vtensor<[4,160,1,12,12],si64>
    %6130 = torch.aten.view %values_250, %5972 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,12,12],f16>
    %6131 = torch.aten.broadcast_to %6130, %5969 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6132 = torch.aten.clone %6131, %int0 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,160,16,12,12],f16>
    %6133 = torch.aten.view %6132, %5950 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6134 = torch.aten.div.Scalar %6133, %int128 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6135 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6136 = torch.aten.detach %6135 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6137 = torch.aten.div.Tensor %6127, %6134 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6138 = torch.aten.add.Tensor %6137, %6136, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6139 = torch.aten.round %6138 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6140 = torch.aten.clamp %6139, %int-128, %int127 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6141 = torch.aten.sub.Tensor %6140, %6136, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6142 = torch.aten.mul.Tensor %6141, %6134 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6143 = torch.aten.broadcast_to %517, %5986 : !torch.vtensor<[1280,160,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,3,3],f16>
    %6144 = torch.aten.clone %6143, %int0 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,160,16,3,3],f16>
    %6145 = torch.aten.view %6144, %5989 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,3,3],f16>
    %6146 = torch.aten.mul.Tensor %516, %6145 : !torch.vtensor<[1280,2560,3,3],si8>, !torch.vtensor<[1280,2560,3,3],f16> -> !torch.vtensor<[1280,2560,3,3],f16>
    %6147 = torch.aten.convolution %6142, %6146, %515, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6148 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6149 = torch.aten.mul.Tensor %6148, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6150 = torch.aten.transpose.int %514, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %6151 = torch.aten.mm %6149, %6150 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6152 = torch.aten.mul.Scalar %513, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %6153 = torch.aten.add.Tensor %6152, %6151, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %6154 = torch.aten.unsqueeze %6153, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %6155 = torch.aten.unsqueeze %6154, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %6156 = torch.aten.add.Tensor %6147, %6155, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6157 = torch.aten.view %6156, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %6158 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6159 = torch.aten.to.dtype %6158, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6160 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6161 = torch.aten.broadcast_to %6159, %6160 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %6162 = torch.aten.to.dtype %6157, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %6163 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6164 = torch.aten.broadcast_to %6162, %6163 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %6165 = torch.aten.to.dtype %6164, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %6166 = torch.aten.sum.dim_IntList %6165, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6167 = torch.aten.div.Scalar %6166, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6168 = torch.aten.sub.Tensor %6165, %6167, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %6169 = torch.aten.mul.Tensor %6168, %6168 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %6170 = torch.aten.sum.dim_IntList %6169, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6171 = torch.aten.div.Scalar %6170, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6172 = torch.aten.to.dtype %6171, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6173 = torch.aten.sum.dim_IntList %6164, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6174 = torch.aten.div.Scalar %6173, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6175 = torch.aten.add.Tensor %6172, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6176 = torch.aten.rsqrt %6175 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6177 = torch.aten.sub.Tensor %6157, %6174, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %6178 = torch.aten.mul.Tensor %6177, %6176 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %6179 = torch.aten.view %6178, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %6180 = torch.aten.unsqueeze %512, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6181 = torch.aten.unsqueeze %6180, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6182 = torch.aten.unsqueeze %6181, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6183 = torch.aten.unsqueeze %511, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6184 = torch.aten.unsqueeze %6183, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6185 = torch.aten.unsqueeze %6184, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6186 = torch.aten.mul.Tensor %6179, %6185 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %6187 = torch.aten.add.Tensor %6186, %6182, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %6188 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6189 = torch.aten.to.dtype %6188, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6190 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6191 = torch.aten.broadcast_to %6189, %6190 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6192 = torch.aten.to.dtype %6187, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %6193 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6194 = torch.aten.broadcast_to %6192, %6193 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6195 = torch.aten.sigmoid %6194 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6196 = torch.aten.mul.Tensor %6195, %6194 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6197 = torch.aten.view %6196, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %6198 = torch.aten.abs %6197 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_252, %indices_253 = torch.aten.max.dim %6198, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %6199 = torch.aten.view %values_252, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %6200 = torch.aten.broadcast_to %6199, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %6201 = torch.aten.clone %6200, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %6202 = torch.aten.view %6201, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6203 = torch.aten.div.Scalar %6202, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6204 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6205 = torch.aten.detach %6204 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6206 = torch.aten.div.Tensor %6196, %6203 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6207 = torch.aten.add.Tensor %6206, %6205, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6208 = torch.aten.round %6207 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6209 = torch.aten.clamp %6208, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6210 = torch.aten.sub.Tensor %6209, %6205, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6211 = torch.aten.mul.Tensor %6210, %6203 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6212 = torch.aten.broadcast_to %510, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6213 = torch.aten.clone %6212, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6214 = torch.aten.view %6213, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6215 = torch.aten.mul.Tensor %509, %6214 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6216 = torch.aten.convolution %6211, %6215, %508, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6217 = torch.aten.view %6087, %5969 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6218 = torch.aten.abs %6217 : !torch.vtensor<[4,160,16,12,12],f16> -> !torch.vtensor<[4,160,16,12,12],f16>
    %values_254, %indices_255 = torch.aten.max.dim %6218, %int2, %true : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,12,12],f16>, !torch.vtensor<[4,160,1,12,12],si64>
    %6219 = torch.aten.view %values_254, %5972 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,12,12],f16>
    %6220 = torch.aten.broadcast_to %6219, %5969 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6221 = torch.aten.clone %6220, %int0 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,160,16,12,12],f16>
    %6222 = torch.aten.view %6221, %5950 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6223 = torch.aten.div.Scalar %6222, %int128 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6224 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6225 = torch.aten.detach %6224 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6226 = torch.aten.div.Tensor %6087, %6223 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6227 = torch.aten.add.Tensor %6226, %6225, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6228 = torch.aten.round %6227 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6229 = torch.aten.clamp %6228, %int-128, %int127 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6230 = torch.aten.sub.Tensor %6229, %6225, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6231 = torch.aten.mul.Tensor %6230, %6223 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6232 = torch.aten.broadcast_to %507, %6077 : !torch.vtensor<[1280,160,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6233 = torch.aten.clone %6232, %int0 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6234 = torch.aten.view %6233, %6080 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6235 = torch.aten.mul.Tensor %506, %6234 : !torch.vtensor<[1280,2560,1,1],si8>, !torch.vtensor<[1280,2560,1,1],f16> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6236 = torch.aten.convolution %6231, %6235, %505, %984, %985, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6237 = torch.aten.add.Tensor %6236, %6216, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6238 = torch.aten.div.Tensor %6237, %925 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6239 = torch.prim.ListConstruct %6238, %4897 : (!torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>) -> !torch.list<vtensor>
    %6240 = torch.aten.cat %6239, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6241 = torch.aten.view %6240, %5927 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f16>
    %6242 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6243 = torch.aten.to.dtype %6242, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6244 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6245 = torch.aten.broadcast_to %6243, %6244 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f32>
    %6246 = torch.aten.to.dtype %6241, %int6, %false, %false, %none : !torch.vtensor<[4,32,80,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,144],f32>
    %6247 = torch.prim.ListConstruct %int4, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6248 = torch.aten.broadcast_to %6246, %6247 : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,144],f32>
    %6249 = torch.aten.to.dtype %6248, %int7, %false, %false, %none : !torch.vtensor<[4,32,80,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,144],f64>
    %6250 = torch.aten.sum.dim_IntList %6249, %996, %true, %none : !torch.vtensor<[4,32,80,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6251 = torch.aten.div.Scalar %6250, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6252 = torch.aten.sub.Tensor %6249, %6251, %float1.000000e00 : !torch.vtensor<[4,32,80,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,80,144],f64>
    %6253 = torch.aten.mul.Tensor %6252, %6252 : !torch.vtensor<[4,32,80,144],f64>, !torch.vtensor<[4,32,80,144],f64> -> !torch.vtensor<[4,32,80,144],f64>
    %6254 = torch.aten.sum.dim_IntList %6253, %996, %true, %none : !torch.vtensor<[4,32,80,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6255 = torch.aten.div.Scalar %6254, %int11520 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6256 = torch.aten.to.dtype %6255, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6257 = torch.aten.sum.dim_IntList %6248, %996, %true, %none : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6258 = torch.aten.div.Scalar %6257, %int11520 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6259 = torch.aten.add.Tensor %6256, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6260 = torch.aten.rsqrt %6259 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6261 = torch.aten.sub.Tensor %6241, %6258, %int1 : !torch.vtensor<[4,32,80,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,80,144],f32>
    %6262 = torch.aten.mul.Tensor %6261, %6260 : !torch.vtensor<[4,32,80,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,80,144],f32>
    %6263 = torch.aten.view %6262, %5950 : !torch.vtensor<[4,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f32>
    %6264 = torch.aten.unsqueeze %504, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %6265 = torch.aten.unsqueeze %6264, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %6266 = torch.aten.unsqueeze %6265, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %6267 = torch.aten.unsqueeze %503, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %6268 = torch.aten.unsqueeze %6267, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %6269 = torch.aten.unsqueeze %6268, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %6270 = torch.aten.mul.Tensor %6263, %6269 : !torch.vtensor<[4,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f16> -> !torch.vtensor<[4,2560,12,12],f32>
    %6271 = torch.aten.add.Tensor %6270, %6266, %int1 : !torch.vtensor<[4,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f32>
    %6272 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6273 = torch.aten.to.dtype %6272, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6274 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6275 = torch.aten.broadcast_to %6273, %6274 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6276 = torch.aten.to.dtype %6271, %int5, %false, %false, %none : !torch.vtensor<[4,2560,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,2560,12,12],f16>
    %6277 = torch.prim.ListConstruct %int4, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6278 = torch.aten.broadcast_to %6276, %6277 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6279 = torch.aten.sigmoid %6278 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6280 = torch.aten.mul.Tensor %6279, %6278 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6281 = torch.aten.view %6280, %5969 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6282 = torch.aten.abs %6281 : !torch.vtensor<[4,160,16,12,12],f16> -> !torch.vtensor<[4,160,16,12,12],f16>
    %values_256, %indices_257 = torch.aten.max.dim %6282, %int2, %true : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,12,12],f16>, !torch.vtensor<[4,160,1,12,12],si64>
    %6283 = torch.aten.view %values_256, %5972 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,12,12],f16>
    %6284 = torch.aten.broadcast_to %6283, %5969 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6285 = torch.aten.clone %6284, %int0 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,160,16,12,12],f16>
    %6286 = torch.aten.view %6285, %5950 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6287 = torch.aten.div.Scalar %6286, %int128 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6288 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6289 = torch.aten.detach %6288 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6290 = torch.aten.div.Tensor %6280, %6287 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6291 = torch.aten.add.Tensor %6290, %6289, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6292 = torch.aten.round %6291 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6293 = torch.aten.clamp %6292, %int-128, %int127 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6294 = torch.aten.sub.Tensor %6293, %6289, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6295 = torch.aten.mul.Tensor %6294, %6287 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6296 = torch.aten.broadcast_to %502, %5986 : !torch.vtensor<[1280,160,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,3,3],f16>
    %6297 = torch.aten.clone %6296, %int0 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,160,16,3,3],f16>
    %6298 = torch.aten.view %6297, %5989 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,3,3],f16>
    %6299 = torch.aten.mul.Tensor %501, %6298 : !torch.vtensor<[1280,2560,3,3],si8>, !torch.vtensor<[1280,2560,3,3],f16> -> !torch.vtensor<[1280,2560,3,3],f16>
    %6300 = torch.aten.convolution %6295, %6299, %500, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6301 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6302 = torch.aten.mul.Tensor %6301, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6303 = torch.aten.transpose.int %499, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %6304 = torch.aten.mm %6302, %6303 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6305 = torch.aten.mul.Scalar %498, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %6306 = torch.aten.add.Tensor %6305, %6304, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %6307 = torch.aten.unsqueeze %6306, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %6308 = torch.aten.unsqueeze %6307, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %6309 = torch.aten.add.Tensor %6300, %6308, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6310 = torch.aten.view %6309, %4898 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f16>
    %6311 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6312 = torch.aten.to.dtype %6311, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6313 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6314 = torch.aten.broadcast_to %6312, %6313 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %6315 = torch.aten.to.dtype %6310, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,144],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f32>
    %6316 = torch.prim.ListConstruct %int4, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6317 = torch.aten.broadcast_to %6315, %6316 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,144],f32>
    %6318 = torch.aten.to.dtype %6317, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,144],f64>
    %6319 = torch.aten.sum.dim_IntList %6318, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6320 = torch.aten.div.Scalar %6319, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6321 = torch.aten.sub.Tensor %6318, %6320, %float1.000000e00 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,144],f64>
    %6322 = torch.aten.mul.Tensor %6321, %6321 : !torch.vtensor<[4,32,40,144],f64>, !torch.vtensor<[4,32,40,144],f64> -> !torch.vtensor<[4,32,40,144],f64>
    %6323 = torch.aten.sum.dim_IntList %6322, %996, %true, %none : !torch.vtensor<[4,32,40,144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6324 = torch.aten.div.Scalar %6323, %int5760 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6325 = torch.aten.to.dtype %6324, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6326 = torch.aten.sum.dim_IntList %6317, %996, %true, %none : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6327 = torch.aten.div.Scalar %6326, %int5760 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6328 = torch.aten.add.Tensor %6325, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6329 = torch.aten.rsqrt %6328 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6330 = torch.aten.sub.Tensor %6310, %6327, %int1 : !torch.vtensor<[4,32,40,144],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,144],f32>
    %6331 = torch.aten.mul.Tensor %6330, %6329 : !torch.vtensor<[4,32,40,144],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,144],f32>
    %6332 = torch.aten.view %6331, %4921 : !torch.vtensor<[4,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f32>
    %6333 = torch.aten.unsqueeze %497, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6334 = torch.aten.unsqueeze %6333, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6335 = torch.aten.unsqueeze %6334, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6336 = torch.aten.unsqueeze %496, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6337 = torch.aten.unsqueeze %6336, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6338 = torch.aten.unsqueeze %6337, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6339 = torch.aten.mul.Tensor %6332, %6338 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,12,12],f32>
    %6340 = torch.aten.add.Tensor %6339, %6335, %int1 : !torch.vtensor<[4,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f32>
    %6341 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6342 = torch.aten.to.dtype %6341, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6343 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6344 = torch.aten.broadcast_to %6342, %6343 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6345 = torch.aten.to.dtype %6340, %int5, %false, %false, %none : !torch.vtensor<[4,1280,12,12],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,12,12],f16>
    %6346 = torch.prim.ListConstruct %int4, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6347 = torch.aten.broadcast_to %6345, %6346 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6348 = torch.aten.sigmoid %6347 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6349 = torch.aten.mul.Tensor %6348, %6347 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6350 = torch.aten.view %6349, %4940 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %6351 = torch.aten.abs %6350 : !torch.vtensor<[4,80,16,12,12],f16> -> !torch.vtensor<[4,80,16,12,12],f16>
    %values_258, %indices_259 = torch.aten.max.dim %6351, %int2, %true : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,12,12],f16>, !torch.vtensor<[4,80,1,12,12],si64>
    %6352 = torch.aten.view %values_258, %4943 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,12,12],f16>
    %6353 = torch.aten.broadcast_to %6352, %4940 : !torch.vtensor<[4,80,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,12,12],f16>
    %6354 = torch.aten.clone %6353, %int0 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,80,16,12,12],f16>
    %6355 = torch.aten.view %6354, %4921 : !torch.vtensor<[4,80,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,12,12],f16>
    %6356 = torch.aten.div.Scalar %6355, %int128 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6357 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6358 = torch.aten.detach %6357 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6359 = torch.aten.div.Tensor %6349, %6356 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6360 = torch.aten.add.Tensor %6359, %6358, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6361 = torch.aten.round %6360 : !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6362 = torch.aten.clamp %6361, %int-128, %int127 : !torch.vtensor<[4,1280,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6363 = torch.aten.sub.Tensor %6362, %6358, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6364 = torch.aten.mul.Tensor %6363, %6356 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6365 = torch.aten.broadcast_to %495, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6366 = torch.aten.clone %6365, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6367 = torch.aten.view %6366, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6368 = torch.aten.mul.Tensor %494, %6367 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6369 = torch.aten.convolution %6364, %6368, %493, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6370 = torch.aten.view %6240, %5969 : !torch.vtensor<[4,2560,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6371 = torch.aten.abs %6370 : !torch.vtensor<[4,160,16,12,12],f16> -> !torch.vtensor<[4,160,16,12,12],f16>
    %values_260, %indices_261 = torch.aten.max.dim %6371, %int2, %true : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,12,12],f16>, !torch.vtensor<[4,160,1,12,12],si64>
    %6372 = torch.aten.view %values_260, %5972 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,12,12],f16>
    %6373 = torch.aten.broadcast_to %6372, %5969 : !torch.vtensor<[4,160,1,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,12,12],f16>
    %6374 = torch.aten.clone %6373, %int0 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.int -> !torch.vtensor<[4,160,16,12,12],f16>
    %6375 = torch.aten.view %6374, %5950 : !torch.vtensor<[4,160,16,12,12],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,12,12],f16>
    %6376 = torch.aten.div.Scalar %6375, %int128 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6377 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6378 = torch.aten.detach %6377 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6379 = torch.aten.div.Tensor %6240, %6376 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6380 = torch.aten.add.Tensor %6379, %6378, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6381 = torch.aten.round %6380 : !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6382 = torch.aten.clamp %6381, %int-128, %int127 : !torch.vtensor<[4,2560,12,12],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6383 = torch.aten.sub.Tensor %6382, %6378, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,12,12],f16>
    %6384 = torch.aten.mul.Tensor %6383, %6376 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[4,2560,12,12],f16> -> !torch.vtensor<[4,2560,12,12],f16>
    %6385 = torch.aten.broadcast_to %492, %6077 : !torch.vtensor<[1280,160,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6386 = torch.aten.clone %6385, %int0 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6387 = torch.aten.view %6386, %6080 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6388 = torch.aten.mul.Tensor %491, %6387 : !torch.vtensor<[1280,2560,1,1],si8>, !torch.vtensor<[1280,2560,1,1],f16> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6389 = torch.aten.convolution %6384, %6388, %490, %984, %985, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6390 = torch.aten.add.Tensor %6389, %6369, %int1 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[4,1280,12,12],f16>, !torch.int -> !torch.vtensor<[4,1280,12,12],f16>
    %6391 = torch.aten.div.Tensor %6390, %925 : !torch.vtensor<[4,1280,12,12],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[4,1280,12,12],f16>
    %6392 = torch.prim.ListConstruct %int24, %int24 : (!torch.int, !torch.int) -> !torch.list<int>
    %6393 = torch.aten.upsample_nearest2d %6391, %6392, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[4,1280,12,12],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[4,1280,24,24],f16>
    %6394 = torch.aten.view %6393, %3705 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %6395 = torch.aten.abs %6394 : !torch.vtensor<[4,80,16,24,24],f16> -> !torch.vtensor<[4,80,16,24,24],f16>
    %values_262, %indices_263 = torch.aten.max.dim %6395, %int2, %true : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,24,24],f16>, !torch.vtensor<[4,80,1,24,24],si64>
    %6396 = torch.aten.view %values_262, %3708 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,24,24],f16>
    %6397 = torch.aten.broadcast_to %6396, %3705 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %6398 = torch.aten.clone %6397, %int0 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,80,16,24,24],f16>
    %6399 = torch.aten.view %6398, %3686 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %6400 = torch.aten.div.Scalar %6399, %int128 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6401 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6402 = torch.aten.detach %6401 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6403 = torch.aten.div.Tensor %6393, %6400 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6404 = torch.aten.add.Tensor %6403, %6402, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6405 = torch.aten.round %6404 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6406 = torch.aten.clamp %6405, %int-128, %int127 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6407 = torch.aten.sub.Tensor %6406, %6402, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6408 = torch.aten.mul.Tensor %6407, %6400 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6409 = torch.aten.broadcast_to %489, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6410 = torch.aten.clone %6409, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6411 = torch.aten.view %6410, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6412 = torch.aten.mul.Tensor %488, %6411 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6413 = torch.aten.convolution %6408, %6412, %487, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6414 = torch.prim.ListConstruct %6413, %4877 : (!torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16>) -> !torch.list<vtensor>
    %6415 = torch.aten.cat %6414, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6416 = torch.prim.ListConstruct %int4, %int32, %int80, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6417 = torch.aten.view %6415, %6416 : !torch.vtensor<[4,2560,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,80,576],f16>
    %6418 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6419 = torch.aten.to.dtype %6418, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6420 = torch.prim.ListConstruct %int4, %int32, %int80, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6421 = torch.aten.broadcast_to %6419, %6420 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,576],f32>
    %6422 = torch.aten.to.dtype %6417, %int6, %false, %false, %none : !torch.vtensor<[4,32,80,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,576],f32>
    %6423 = torch.prim.ListConstruct %int4, %int32, %int80, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6424 = torch.aten.broadcast_to %6422, %6423 : !torch.vtensor<[4,32,80,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,80,576],f32>
    %6425 = torch.aten.to.dtype %6424, %int7, %false, %false, %none : !torch.vtensor<[4,32,80,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,80,576],f64>
    %6426 = torch.aten.sum.dim_IntList %6425, %996, %true, %none : !torch.vtensor<[4,32,80,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6427 = torch.aten.div.Scalar %6426, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6428 = torch.aten.sub.Tensor %6425, %6427, %float1.000000e00 : !torch.vtensor<[4,32,80,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,80,576],f64>
    %6429 = torch.aten.mul.Tensor %6428, %6428 : !torch.vtensor<[4,32,80,576],f64>, !torch.vtensor<[4,32,80,576],f64> -> !torch.vtensor<[4,32,80,576],f64>
    %6430 = torch.aten.sum.dim_IntList %6429, %996, %true, %none : !torch.vtensor<[4,32,80,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6431 = torch.aten.div.Scalar %6430, %int46080 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6432 = torch.aten.to.dtype %6431, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6433 = torch.aten.sum.dim_IntList %6424, %996, %true, %none : !torch.vtensor<[4,32,80,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6434 = torch.aten.div.Scalar %6433, %int46080 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6435 = torch.aten.add.Tensor %6432, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6436 = torch.aten.rsqrt %6435 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6437 = torch.aten.sub.Tensor %6417, %6434, %int1 : !torch.vtensor<[4,32,80,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,80,576],f32>
    %6438 = torch.aten.mul.Tensor %6437, %6436 : !torch.vtensor<[4,32,80,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,80,576],f32>
    %6439 = torch.prim.ListConstruct %int4, %int2560, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6440 = torch.aten.view %6438, %6439 : !torch.vtensor<[4,32,80,576],f32>, !torch.list<int> -> !torch.vtensor<[4,2560,24,24],f32>
    %6441 = torch.aten.unsqueeze %486, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %6442 = torch.aten.unsqueeze %6441, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %6443 = torch.aten.unsqueeze %6442, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %6444 = torch.aten.unsqueeze %485, %int0 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[1,2560],f16>
    %6445 = torch.aten.unsqueeze %6444, %int2 : !torch.vtensor<[1,2560],f16>, !torch.int -> !torch.vtensor<[1,2560,1],f16>
    %6446 = torch.aten.unsqueeze %6445, %int3 : !torch.vtensor<[1,2560,1],f16>, !torch.int -> !torch.vtensor<[1,2560,1,1],f16>
    %6447 = torch.aten.mul.Tensor %6440, %6446 : !torch.vtensor<[4,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f16> -> !torch.vtensor<[4,2560,24,24],f32>
    %6448 = torch.aten.add.Tensor %6447, %6443, %int1 : !torch.vtensor<[4,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f32>
    %6449 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6450 = torch.aten.to.dtype %6449, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6451 = torch.prim.ListConstruct %int4, %int2560, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6452 = torch.aten.broadcast_to %6450, %6451 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,24,24],f16>
    %6453 = torch.aten.to.dtype %6448, %int5, %false, %false, %none : !torch.vtensor<[4,2560,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,2560,24,24],f16>
    %6454 = torch.prim.ListConstruct %int4, %int2560, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6455 = torch.aten.broadcast_to %6453, %6454 : !torch.vtensor<[4,2560,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,24,24],f16>
    %6456 = torch.aten.sigmoid %6455 : !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6457 = torch.aten.mul.Tensor %6456, %6455 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6458 = torch.prim.ListConstruct %int4, %int160, %int16, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6459 = torch.aten.view %6457, %6458 : !torch.vtensor<[4,2560,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,24,24],f16>
    %6460 = torch.aten.abs %6459 : !torch.vtensor<[4,160,16,24,24],f16> -> !torch.vtensor<[4,160,16,24,24],f16>
    %values_264, %indices_265 = torch.aten.max.dim %6460, %int2, %true : !torch.vtensor<[4,160,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,24,24],f16>, !torch.vtensor<[4,160,1,24,24],si64>
    %6461 = torch.prim.ListConstruct %int4, %int160, %int1, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6462 = torch.aten.view %values_264, %6461 : !torch.vtensor<[4,160,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,24,24],f16>
    %6463 = torch.aten.broadcast_to %6462, %6458 : !torch.vtensor<[4,160,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,24,24],f16>
    %6464 = torch.aten.clone %6463, %int0 : !torch.vtensor<[4,160,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,160,16,24,24],f16>
    %6465 = torch.aten.view %6464, %6439 : !torch.vtensor<[4,160,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,24,24],f16>
    %6466 = torch.aten.div.Scalar %6465, %int128 : !torch.vtensor<[4,2560,24,24],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6467 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6468 = torch.aten.detach %6467 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6469 = torch.aten.div.Tensor %6457, %6466 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6470 = torch.aten.add.Tensor %6469, %6468, %int1 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6471 = torch.aten.round %6470 : !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6472 = torch.aten.clamp %6471, %int-128, %int127 : !torch.vtensor<[4,2560,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6473 = torch.aten.sub.Tensor %6472, %6468, %int1 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6474 = torch.aten.mul.Tensor %6473, %6466 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6475 = torch.aten.broadcast_to %484, %5986 : !torch.vtensor<[1280,160,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,3,3],f16>
    %6476 = torch.aten.clone %6475, %int0 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,160,16,3,3],f16>
    %6477 = torch.aten.view %6476, %5989 : !torch.vtensor<[1280,160,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,3,3],f16>
    %6478 = torch.aten.mul.Tensor %483, %6477 : !torch.vtensor<[1280,2560,3,3],si8>, !torch.vtensor<[1280,2560,3,3],f16> -> !torch.vtensor<[1280,2560,3,3],f16>
    %6479 = torch.aten.convolution %6474, %6478, %482, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6480 = torch.aten.sigmoid %983 : !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6481 = torch.aten.mul.Tensor %6480, %983 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[4,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6482 = torch.aten.transpose.int %481, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16>
    %6483 = torch.aten.mm %6481, %6482 : !torch.vtensor<[4,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[4,1280],f16>
    %6484 = torch.aten.mul.Scalar %480, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16>
    %6485 = torch.aten.add.Tensor %6484, %6483, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280],f16>
    %6486 = torch.aten.unsqueeze %6485, %int2 : !torch.vtensor<[4,1280],f16>, !torch.int -> !torch.vtensor<[4,1280,1],f16>
    %6487 = torch.aten.unsqueeze %6486, %int3 : !torch.vtensor<[4,1280,1],f16>, !torch.int -> !torch.vtensor<[4,1280,1,1],f16>
    %6488 = torch.aten.add.Tensor %6479, %6487, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6489 = torch.aten.view %6488, %3663 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f16>
    %6490 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6491 = torch.aten.to.dtype %6490, %int6, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f32>
    %6492 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6493 = torch.aten.broadcast_to %6491, %6492 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %6494 = torch.aten.to.dtype %6489, %int6, %false, %false, %none : !torch.vtensor<[4,32,40,576],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f32>
    %6495 = torch.prim.ListConstruct %int4, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6496 = torch.aten.broadcast_to %6494, %6495 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,32,40,576],f32>
    %6497 = torch.aten.to.dtype %6496, %int7, %false, %false, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,40,576],f64>
    %6498 = torch.aten.sum.dim_IntList %6497, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6499 = torch.aten.div.Scalar %6498, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6500 = torch.aten.sub.Tensor %6497, %6499, %float1.000000e00 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,1,1],f64>, !torch.float -> !torch.vtensor<[4,32,40,576],f64>
    %6501 = torch.aten.mul.Tensor %6500, %6500 : !torch.vtensor<[4,32,40,576],f64>, !torch.vtensor<[4,32,40,576],f64> -> !torch.vtensor<[4,32,40,576],f64>
    %6502 = torch.aten.sum.dim_IntList %6501, %996, %true, %none : !torch.vtensor<[4,32,40,576],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f64>
    %6503 = torch.aten.div.Scalar %6502, %int23040 : !torch.vtensor<[4,32,1,1],f64>, !torch.int -> !torch.vtensor<[4,32,1,1],f64>
    %6504 = torch.aten.to.dtype %6503, %int6, %false, %false, %none : !torch.vtensor<[4,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6505 = torch.aten.sum.dim_IntList %6496, %996, %true, %none : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,32,1,1],f32>
    %6506 = torch.aten.div.Scalar %6505, %int23040 : !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6507 = torch.aten.add.Tensor %6504, %939, %int1 : !torch.vtensor<[4,32,1,1],f32>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,32,1,1],f32>
    %6508 = torch.aten.rsqrt %6507 : !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,1,1],f32>
    %6509 = torch.aten.sub.Tensor %6489, %6506, %int1 : !torch.vtensor<[4,32,40,576],f16>, !torch.vtensor<[4,32,1,1],f32>, !torch.int -> !torch.vtensor<[4,32,40,576],f32>
    %6510 = torch.aten.mul.Tensor %6509, %6508 : !torch.vtensor<[4,32,40,576],f32>, !torch.vtensor<[4,32,1,1],f32> -> !torch.vtensor<[4,32,40,576],f32>
    %6511 = torch.aten.view %6510, %3686 : !torch.vtensor<[4,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f32>
    %6512 = torch.aten.unsqueeze %479, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6513 = torch.aten.unsqueeze %6512, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6514 = torch.aten.unsqueeze %6513, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6515 = torch.aten.unsqueeze %478, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16>
    %6516 = torch.aten.unsqueeze %6515, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16>
    %6517 = torch.aten.unsqueeze %6516, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16>
    %6518 = torch.aten.mul.Tensor %6511, %6517 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[4,1280,24,24],f32>
    %6519 = torch.aten.add.Tensor %6518, %6514, %int1 : !torch.vtensor<[4,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f32>
    %6520 = torch.prim.NumToTensor.Scalar %float0.000000e00 : !torch.float -> !torch.vtensor<[],f64>
    %6521 = torch.aten.to.dtype %6520, %int5, %false, %false, %none : !torch.vtensor<[],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
    %6522 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6523 = torch.aten.broadcast_to %6521, %6522 : !torch.vtensor<[],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %6524 = torch.aten.to.dtype %6519, %int5, %false, %false, %none : !torch.vtensor<[4,1280,24,24],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[4,1280,24,24],f16>
    %6525 = torch.prim.ListConstruct %int4, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6526 = torch.aten.broadcast_to %6524, %6525 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %6527 = torch.aten.sigmoid %6526 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6528 = torch.aten.mul.Tensor %6527, %6526 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6529 = torch.aten.view %6528, %3705 : !torch.vtensor<[4,1280,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %6530 = torch.aten.abs %6529 : !torch.vtensor<[4,80,16,24,24],f16> -> !torch.vtensor<[4,80,16,24,24],f16>
    %values_266, %indices_267 = torch.aten.max.dim %6530, %int2, %true : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,80,1,24,24],f16>, !torch.vtensor<[4,80,1,24,24],si64>
    %6531 = torch.aten.view %values_266, %3708 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,1,24,24],f16>
    %6532 = torch.aten.broadcast_to %6531, %3705 : !torch.vtensor<[4,80,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,80,16,24,24],f16>
    %6533 = torch.aten.clone %6532, %int0 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,80,16,24,24],f16>
    %6534 = torch.aten.view %6533, %3686 : !torch.vtensor<[4,80,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,1280,24,24],f16>
    %6535 = torch.aten.div.Scalar %6534, %int128 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6536 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6537 = torch.aten.detach %6536 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6538 = torch.aten.div.Tensor %6528, %6535 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6539 = torch.aten.add.Tensor %6538, %6537, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6540 = torch.aten.round %6539 : !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6541 = torch.aten.clamp %6540, %int-128, %int127 : !torch.vtensor<[4,1280,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6542 = torch.aten.sub.Tensor %6541, %6537, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6543 = torch.aten.mul.Tensor %6542, %6535 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[4,1280,24,24],f16> -> !torch.vtensor<[4,1280,24,24],f16>
    %6544 = torch.aten.broadcast_to %477, %3722 : !torch.vtensor<[1280,80,1,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6545 = torch.aten.clone %6544, %int0 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.int -> !torch.vtensor<[1280,80,16,3,3],f16>
    %6546 = torch.aten.view %6545, %3725 : !torch.vtensor<[1280,80,16,3,3],f16>, !torch.list<int> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6547 = torch.aten.mul.Tensor %476, %6546 : !torch.vtensor<[1280,1280,3,3],si8>, !torch.vtensor<[1280,1280,3,3],f16> -> !torch.vtensor<[1280,1280,3,3],f16>
    %6548 = torch.aten.convolution %6543, %6547, %475, %984, %984, %984, %false, %985, %int1 : !torch.vtensor<[4,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[4,1280,24,24],f16>
    %6549 = torch.aten.view %6415, %6458 : !torch.vtensor<[4,2560,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,24,24],f16>
    %6550 = torch.aten.abs %6549 : !torch.vtensor<[4,160,16,24,24],f16> -> !torch.vtensor<[4,160,16,24,24],f16>
    %values_268, %indices_269 = torch.aten.max.dim %6550, %int2, %true : !torch.vtensor<[4,160,16,24,24],f16>, !torch.int, !torch.bool -> !torch.vtensor<[4,160,1,24,24],f16>, !torch.vtensor<[4,160,1,24,24],si64>
    %6551 = torch.aten.view %values_268, %6461 : !torch.vtensor<[4,160,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,160,1,24,24],f16>
    %6552 = torch.aten.broadcast_to %6551, %6458 : !torch.vtensor<[4,160,1,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,160,16,24,24],f16>
    %6553 = torch.aten.clone %6552, %int0 : !torch.vtensor<[4,160,16,24,24],f16>, !torch.int -> !torch.vtensor<[4,160,16,24,24],f16>
    %6554 = torch.aten.view %6553, %6439 : !torch.vtensor<[4,160,16,24,24],f16>, !torch.list<int> -> !torch.vtensor<[4,2560,24,24],f16>
    %6555 = torch.aten.div.Scalar %6554, %int128 : !torch.vtensor<[4,2560,24,24],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6556 = torch.aten.detach %936 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6557 = torch.aten.detach %6556 : !torch.vtensor<[],f16> -> !torch.vtensor<[],f16>
    %6558 = torch.aten.div.Tensor %6415, %6555 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6559 = torch.aten.add.Tensor %6558, %6557, %int1 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6560 = torch.aten.round %6559 : !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6561 = torch.aten.clamp %6560, %int-128, %int127 : !torch.vtensor<[4,2560,24,24],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6562 = torch.aten.sub.Tensor %6561, %6557, %int1 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[],f16>, !torch.int -> !torch.vtensor<[4,2560,24,24],f16>
    %6563 = torch.aten.mul.Tensor %6562, %6555 : !torch.vtensor<[4,2560,24,24],f16>, !torch.vtensor<[4,2560,24,24],f16> -> !torch.vtensor<[4,2560,24,24],f16>
    %6564 = torch.aten.broadcast_to %474, %6077 : !torch.vtensor<[1280,160,1,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6565 = torch.aten.clone %6564, %int0 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.int -> !torch.vtensor<[1280,160,16,1,1],f16>
    %6566 = torch.aten.view %6565, %6080 : !torch.vtensor<[1280,160,16,1,1],f16>, !torch.list<int> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6567 = torch.aten.mul.Tensor %473, %6566 : !torch.vtensor<[1280,2560,1,1],si8>, !torch.vtensor<[1280,2560,1,1],f16> -> !torch.vtensor<[1280,2560,1,1],f16>
    %6568 = torch.aten.convol