pashu123 · October 4, 2022 14:34
diff --git a/stable_diff_f16.mlir b/stable_diff_f16.mlir
 // -----// IR Dump After DropShapeCalculations (torch-drop-shape-calculations) //----- //
 func.func @forward(%arg0: !torch.vtensor<[2,4,64,64],f16>, %arg1: !torch.vtensor<[1],f16>, %arg2: !torch.vtensor<[2,77,768],f16>) -> !torch.vtensor<[2,4,64,64],f16> {
  %int160 = torch.constant.int 160
  %float1.600000e02 = torch.constant.float 1.600000e+02
  %str = torch.constant.str "AssertionError: "
  %int4 = torch.constant.int 4
  %float0.000000e00 = torch.constant.float 0.000000e+00
  %0 = torch.vtensor.literal(dense<0.079056941504209485> : tensor<f64>) : !torch.vtensor<[],f64>
  %1 = torch.vtensor.literal(dense<0.11180339887498948> : tensor<f64>) : !torch.vtensor<[],f64>
  %2 = torch.vtensor.literal(dense<0.15811388300841897> : tensor<f64>) : !torch.vtensor<[],f64>
  %3 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64>
  %4 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64>
  %5 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64>
  %6 = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64>
  %7 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64>
  %8 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64>
  %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16>
  %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16>
  %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf16>) : !torch.vtensor<[640,320,3,3],f16>
  %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf16>) : !torch.vtensor<[640,320,1,1],f16>
  %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf16>) : !torch.vtensor<[1280,640,3,3],f16>
  %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf16>) : !torch.vtensor<[1280,640,1,1],f16>
  %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
  %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
  %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf16>) : !torch.vtensor<[1280,1920,3,3],f16>
  %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf16>) : !torch.vtensor<[1280,1920,1,1],f16>
  %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
  %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
  %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
  %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
  %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
  %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
  %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
  %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
  %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf16>) : !torch.vtensor<[640,1920,3,3],f16>
  %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf16>) : !torch.vtensor<[640,1920,1,1],f16>
  %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
  %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf16>) : !torch.vtensor<[640,1280,3,3],f16>
  %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf16>) : !torch.vtensor<[640,1280,1,1],f16>
  %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf16>) : !torch.vtensor<[640,960,3,3],f16>
  %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
  %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf16>) : !torch.vtensor<[640,960,1,1],f16>
  %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
  %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
  %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
  %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
  %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
  %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
  %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
  %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
  %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf16>) : !torch.vtensor<[320,960,3,3],f16>
  %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf16>) : !torch.vtensor<[320,960,1,1],f16>
  %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
  %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16>
  %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
  %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16>
  %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
  %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
  %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
  %52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
  %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
  %54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
  %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
  %56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16>
  %57 = torch.vtensor.literal(dense<[-1.393320e-03, -1.588820e-03, -2.624990e-04, -2.531050e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16>
  %int2 = torch.constant.int 2
  %false = torch.constant.bool false
  %int0 = torch.constant.int 0
  %int6 = torch.constant.int 6
  %none = torch.constant.none
  %int9223372036854775807 = torch.constant.int 9223372036854775807
  %int1 = torch.constant.int 1
  %int-1 = torch.constant.int -1
  %int5 = torch.constant.int 5
  %true = torch.constant.bool true
  %int32 = torch.constant.int 32
  %int10 = torch.constant.int 10
  %int4096 = torch.constant.int 4096
  %int3 = torch.constant.int 3
  %int320 = torch.constant.int 320
  %int64 = torch.constant.int 64
  %int1310720 = torch.constant.int 1310720
  %float1.000000e-05 = torch.constant.float 1.000000e-05
  %int8192 = torch.constant.int 8192
  %int8 = torch.constant.int 8
  %int40 = torch.constant.int 40
  %int16 = torch.constant.int 16
  %int-2 = torch.constant.int -2
  %int163840 = torch.constant.int 163840
  %int16777216 = torch.constant.int 16777216
  %int154 = torch.constant.int 154
  %int768 = torch.constant.int 768
  %int77 = torch.constant.int 77
  %int24640 = torch.constant.int 24640
  %int3080 = torch.constant.int 3080
  %int315392 = torch.constant.int 315392
  %int2560 = torch.constant.int 2560
  %int1280 = torch.constant.int 1280
  %str_0 = torch.constant.str "none"
  %int20480 = torch.constant.int 20480
  %int1024 = torch.constant.int 1024
  %int20 = torch.constant.int 20
  %int640 = torch.constant.int 640
  %int655360 = torch.constant.int 655360
  %int2048 = torch.constant.int 2048
  %int80 = torch.constant.int 80
  %int81920 = torch.constant.int 81920
  %int1048576 = torch.constant.int 1048576
  %int49280 = torch.constant.int 49280
  %int6160 = torch.constant.int 6160
  %int78848 = torch.constant.int 78848
  %int5120 = torch.constant.int 5120
  %int256 = torch.constant.int 256
  %int327680 = torch.constant.int 327680
  %int512 = torch.constant.int 512
  %int40960 = torch.constant.int 40960
  %int65536 = torch.constant.int 65536
  %int98560 = torch.constant.int 98560
  %int12320 = torch.constant.int 12320
  %int19712 = torch.constant.int 19712
  %int10240 = torch.constant.int 10240
  %int128 = torch.constant.int 128
  %int4928 = torch.constant.int 4928
  %float2.000000e00 = torch.constant.float 2.000000e+00
  %int60 = torch.constant.int 60
  %int1920 = torch.constant.int 1920
  %int30 = torch.constant.int 30
  %int960 = torch.constant.int 960
  %58 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
  %59 = torch.aten.expand %arg1, %58, %false : !torch.vtensor<[1],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[2],f16>
  %cuda3A0 = torch.constant.device "cuda:0"
  %60 = torch.aten.arange.start %int0, %int160, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
  %61 = torch.aten.mul.Tensor %60, %8 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32>
  %62 = torch.aten.div.Tensor %61, %7 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32>
  %63 = torch.aten.exp %62 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
  %64 = torch.aten.slice.Tensor %59, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f16>
  %65 = torch.aten.unsqueeze %64, %int1 : !torch.vtensor<[2],f16>, !torch.int -> !torch.vtensor<[2,1],f16>
  %66 = torch.aten._to_copy %65, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1],f32>
  %67 = torch.aten.unsqueeze %63, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
  %68 = torch.aten.slice.Tensor %67, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,160],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,160],f32>
  %69 = torch.aten.mul.Tensor %66, %68 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32>
  %70 = torch.aten.mul.Tensor %69, %6 : !torch.vtensor<[2,160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,160],f32>
  %71 = torch.aten.sin %70 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
  %72 = torch.aten.cos %70 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
  %73 = torch.prim.ListConstruct %71, %72 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
  %74 = torch.aten.cat %73, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
  %75 = torch.aten.slice.Tensor %74, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
  %76 = torch.aten.slice.Tensor %75, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
  %77 = torch.aten.slice.Tensor %74, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
  %78 = torch.aten.slice.Tensor %77, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
  %79 = torch.prim.ListConstruct %76, %78 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
  %80 = torch.aten.cat %79, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
  %81 = torch.aten._to_copy %80, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320],f16>
  %82 = torch.aten.t %9 : !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[320,1280],f16>
  %83 = torch.aten.addmm %29, %81, %82, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %84 = torch.aten.silu %83 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %85 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %86 = torch.aten.addmm %29, %84, %85, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %87 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %88 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
  %89 = torch.aten._convolution %arg0, %10, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %90 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %91 = torch.aten.view %89, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %92 = torch.aten._to_copy %91, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %93 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
  %94 = torch.aten.var.correction %92, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %95 = torch.aten.mean.dim %92, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %96 = torch.aten.add.Tensor %94, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %97 = torch.aten.rsqrt %96 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %98 = torch.aten.sub.Tensor %91, %95, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %99 = torch.aten.mul.Tensor %98, %97 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %100 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %101 = torch.aten.view %99, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %102 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %103 = torch.aten.unsqueeze %102, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %104 = torch.aten.mul.Tensor %101, %103 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %105 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %106 = torch.aten.unsqueeze %105, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %107 = torch.aten.add.Tensor %104, %106, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %108 = torch.aten._to_copy %107, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %109 = torch.aten.silu %108 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %110 = torch.aten._convolution %109, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %111 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %112 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %113 = torch.aten.addmm %55, %111, %112, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %114 = torch.aten.slice.Tensor %113, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %115 = torch.aten.slice.Tensor %114, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %116 = torch.aten.unsqueeze %115, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %117 = torch.aten.unsqueeze %116, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %118 = torch.aten.add.Tensor %110, %117, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %119 = torch.aten.view %118, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %120 = torch.aten._to_copy %119, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %121 = torch.aten.var.correction %120, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %122 = torch.aten.mean.dim %120, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %123 = torch.aten.add.Tensor %121, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %124 = torch.aten.rsqrt %123 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %125 = torch.aten.sub.Tensor %119, %122, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %126 = torch.aten.mul.Tensor %125, %124 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %127 = torch.aten.view %126, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %128 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %129 = torch.aten.unsqueeze %128, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %130 = torch.aten.mul.Tensor %127, %129 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %131 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %132 = torch.aten.unsqueeze %131, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %133 = torch.aten.add.Tensor %130, %132, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %134 = torch.aten._to_copy %133, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %135 = torch.aten.silu %134 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %136 = torch.aten._convolution %135, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %137 = torch.aten.add.Tensor %89, %136, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %138 = torch.aten.div.Tensor %137, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
  %139 = torch.aten.view %138, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %140 = torch.aten._to_copy %139, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %141 = torch.aten.var.correction %140, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %142 = torch.aten.mean.dim %140, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %143 = torch.aten.add.Tensor %141, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %144 = torch.aten.rsqrt %143 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %145 = torch.aten.sub.Tensor %139, %142, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %146 = torch.aten.mul.Tensor %145, %144 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %147 = torch.aten.view %146, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %148 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %149 = torch.aten.unsqueeze %148, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %150 = torch.aten.mul.Tensor %147, %149 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %151 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %152 = torch.aten.unsqueeze %151, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %153 = torch.aten.add.Tensor %150, %152, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %154 = torch.aten._to_copy %153, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %155 = torch.aten._convolution %154, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %156 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %157 = torch.aten.permute %155, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %158 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %159 = torch.prim.ListConstruct %int1310720, %int1, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %160 = torch.aten._reshape_alias %157, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %161 = torch.prim.ListConstruct %int320 : (!torch.int) -> !torch.list<int>
  %result0, %result1, %result2 = torch.aten.native_layer_norm %160, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %162 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %163 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
  %164 = torch.prim.ListConstruct %int320, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %165 = torch.aten._reshape_alias %result0, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %166 = torch.aten.mm %165, %162 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %167 = torch.aten._unsafe_view %166, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %168 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %169 = torch.aten._reshape_alias %result0, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %170 = torch.aten.mm %169, %168 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %171 = torch.aten._unsafe_view %170, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %172 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %173 = torch.aten._reshape_alias %result0, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %174 = torch.aten.mm %173, %172 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %175 = torch.aten._unsafe_view %174, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %176 = torch.prim.ListConstruct %int2, %int4096, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %177 = torch.prim.ListConstruct %int1310720, %int320, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %178 = torch.aten._reshape_alias %167, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %179 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %180 = torch.aten.permute %178, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %181 = torch.aten.clone %180, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %182 = torch.prim.ListConstruct %int16, %int4096, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %183 = torch.aten._unsafe_view %181, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %184 = torch.aten._reshape_alias %171, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %185 = torch.aten.permute %184, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %186 = torch.aten.clone %185, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %187 = torch.aten._unsafe_view %186, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %188 = torch.aten._reshape_alias %175, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %189 = torch.aten.permute %188, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %190 = torch.aten.clone %189, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %191 = torch.aten._unsafe_view %190, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %192 = torch.aten.transpose.int %187, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
  %193 = torch.aten.expand %183, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %194 = torch.prim.ListConstruct %int163840, %int40, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %195 = torch.aten._reshape_alias %193, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %196 = torch.prim.ListConstruct %int16, %int40, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %197 = torch.aten.expand %192, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
  %198 = torch.prim.ListConstruct %int163840, %int1, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %199 = torch.aten._reshape_alias %197, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
  %200 = torch.aten.bmm %195, %199 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
  %201 = torch.prim.ListConstruct %int16, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %202 = torch.aten._unsafe_view %200, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %203 = torch.aten.mul.Tensor %202, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
  %204 = torch.aten._softmax %203, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %205 = torch.aten.expand %204, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %206 = torch.prim.ListConstruct %int16777216, %int4096, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %207 = torch.aten._reshape_alias %205, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %208 = torch.aten.expand %191, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %209 = torch.aten._reshape_alias %208, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %210 = torch.aten.bmm %207, %209 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %211 = torch.aten._unsafe_view %210, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %212 = torch.prim.ListConstruct %int2, %int8, %int4096, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %213 = torch.prim.ListConstruct %int1310720, %int163840, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %214 = torch.aten._reshape_alias %211, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %215 = torch.aten.permute %214, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %216 = torch.aten.clone %215, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %217 = torch.aten._unsafe_view %216, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %218 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %219 = torch.aten.view %217, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %220 = torch.aten.addmm %55, %219, %218, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %221 = torch.aten.view %220, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %222 = torch.aten.add.Tensor %221, %160, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_1, %result1_2, %result2_3 = torch.aten.native_layer_norm %222, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %223 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %224 = torch.aten._reshape_alias %result0_1, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %225 = torch.aten.mm %224, %223 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %226 = torch.aten._unsafe_view %225, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %227 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %228 = torch.prim.ListConstruct %int154, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
  %229 = torch.prim.ListConstruct %int768, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %230 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %231 = torch.aten.mm %230, %227 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %232 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %233 = torch.aten._unsafe_view %231, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %234 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %235 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %236 = torch.aten.mm %235, %234 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %237 = torch.aten._unsafe_view %236, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %238 = torch.aten._reshape_alias %226, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %239 = torch.aten.permute %238, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %240 = torch.aten.clone %239, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %241 = torch.aten._unsafe_view %240, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %242 = torch.prim.ListConstruct %int2, %int77, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %243 = torch.prim.ListConstruct %int24640, %int320, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %244 = torch.aten._reshape_alias %233, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %245 = torch.aten.permute %244, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %246 = torch.aten.clone %245, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %247 = torch.prim.ListConstruct %int16, %int77, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %248 = torch.aten._unsafe_view %246, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %249 = torch.aten._reshape_alias %237, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %250 = torch.aten.permute %249, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %251 = torch.aten.clone %250, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %252 = torch.aten._unsafe_view %251, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %253 = torch.aten.transpose.int %248, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
  %254 = torch.aten.expand %241, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %255 = torch.aten._reshape_alias %254, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %256 = torch.prim.ListConstruct %int16, %int40, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %257 = torch.aten.expand %253, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
  %258 = torch.prim.ListConstruct %int3080, %int1, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %259 = torch.aten._reshape_alias %257, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
  %260 = torch.aten.bmm %255, %259 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
  %261 = torch.prim.ListConstruct %int16, %int4096, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %262 = torch.aten._unsafe_view %260, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %263 = torch.aten.mul.Tensor %262, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
  %264 = torch.aten._softmax %263, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %265 = torch.aten.expand %264, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %266 = torch.prim.ListConstruct %int315392, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %267 = torch.aten._reshape_alias %265, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %268 = torch.aten.expand %252, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
  %269 = torch.prim.ListConstruct %int3080, %int40, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %270 = torch.aten._reshape_alias %268, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %271 = torch.aten.bmm %267, %270 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %272 = torch.aten._unsafe_view %271, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %273 = torch.aten._reshape_alias %272, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %274 = torch.aten.permute %273, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %275 = torch.aten.clone %274, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %276 = torch.aten._unsafe_view %275, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %277 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %278 = torch.aten.view %276, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %279 = torch.aten.addmm %55, %278, %277, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %280 = torch.aten.view %279, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %281 = torch.aten.add.Tensor %280, %222, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_4, %result1_5, %result2_6 = torch.aten.native_layer_norm %281, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %282 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %283 = torch.aten.view %result0_4, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %284 = torch.aten.addmm %52, %283, %282, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
  %285 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %286 = torch.aten.view %284, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
  %287 = torch.aten.slice.Tensor %286, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %288 = torch.aten.slice.Tensor %286, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %289 = torch.aten.gelu %288, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
  %290 = torch.aten.mul.Tensor %287, %289 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
  %291 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %292 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
  %293 = torch.aten.view %290, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
  %294 = torch.aten.addmm %55, %293, %291, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %295 = torch.aten.view %294, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %296 = torch.aten.add.Tensor %295, %281, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %297 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %298 = torch.prim.ListConstruct %int1310720, %int20480, %int320, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %299 = torch.aten._reshape_alias %296, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %300 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %301 = torch.aten.permute %299, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
  %302 = torch.aten._convolution %301, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %303 = torch.aten.add.Tensor %302, %138, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %304 = torch.aten.clone %303, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %305 = torch.aten.view %304, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %306 = torch.aten._to_copy %305, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %307 = torch.aten.var.correction %306, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %308 = torch.aten.mean.dim %306, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %309 = torch.aten.add.Tensor %307, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %310 = torch.aten.rsqrt %309 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %311 = torch.aten.sub.Tensor %305, %308, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %312 = torch.aten.mul.Tensor %311, %310 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %313 = torch.aten.view %312, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %314 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %315 = torch.aten.unsqueeze %314, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %316 = torch.aten.mul.Tensor %313, %315 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %317 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %318 = torch.aten.unsqueeze %317, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %319 = torch.aten.add.Tensor %316, %318, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %320 = torch.aten._to_copy %319, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %321 = torch.aten.silu %320 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %322 = torch.aten._convolution %321, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %323 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %324 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %325 = torch.aten.addmm %55, %323, %324, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %326 = torch.aten.slice.Tensor %325, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %327 = torch.aten.slice.Tensor %326, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %328 = torch.aten.unsqueeze %327, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %329 = torch.aten.unsqueeze %328, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %330 = torch.aten.add.Tensor %322, %329, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %331 = torch.aten.view %330, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %332 = torch.aten._to_copy %331, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %333 = torch.aten.var.correction %332, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %334 = torch.aten.mean.dim %332, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %335 = torch.aten.add.Tensor %333, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %336 = torch.aten.rsqrt %335 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %337 = torch.aten.sub.Tensor %331, %334, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %338 = torch.aten.mul.Tensor %337, %336 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %339 = torch.aten.view %338, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %340 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %341 = torch.aten.unsqueeze %340, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %342 = torch.aten.mul.Tensor %339, %341 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %343 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %344 = torch.aten.unsqueeze %343, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %345 = torch.aten.add.Tensor %342, %344, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %346 = torch.aten._to_copy %345, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %347 = torch.aten.silu %346 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %348 = torch.aten._convolution %347, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %349 = torch.aten.add.Tensor %303, %348, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %350 = torch.aten.div.Tensor %349, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
  %351 = torch.aten.clone %350, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %352 = torch.aten.view %351, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %353 = torch.aten._to_copy %352, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %354 = torch.aten.var.correction %353, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %355 = torch.aten.mean.dim %353, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %356 = torch.aten.add.Tensor %354, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %357 = torch.aten.rsqrt %356 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %358 = torch.aten.sub.Tensor %352, %355, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %359 = torch.aten.mul.Tensor %358, %357 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %360 = torch.aten.view %359, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %361 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %362 = torch.aten.unsqueeze %361, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %363 = torch.aten.mul.Tensor %360, %362 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %364 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %365 = torch.aten.unsqueeze %364, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %366 = torch.aten.add.Tensor %363, %365, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %367 = torch.aten._to_copy %366, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %368 = torch.aten._convolution %367, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %369 = torch.aten.permute %368, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %370 = torch.aten._reshape_alias %369, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %result0_7, %result1_8, %result2_9 = torch.aten.native_layer_norm %370, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %371 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %372 = torch.aten._reshape_alias %result0_7, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %373 = torch.aten.mm %372, %371 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %374 = torch.aten._unsafe_view %373, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %375 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %376 = torch.aten._reshape_alias %result0_7, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %377 = torch.aten.mm %376, %375 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %378 = torch.aten._unsafe_view %377, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %379 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %380 = torch.aten._reshape_alias %result0_7, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %381 = torch.aten.mm %380, %379 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %382 = torch.aten._unsafe_view %381, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %383 = torch.aten._reshape_alias %374, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %384 = torch.aten.permute %383, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %385 = torch.aten.clone %384, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %386 = torch.aten._unsafe_view %385, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %387 = torch.aten._reshape_alias %378, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %388 = torch.aten.permute %387, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %389 = torch.aten.clone %388, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %390 = torch.aten._unsafe_view %389, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %391 = torch.aten._reshape_alias %382, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %392 = torch.aten.permute %391, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %393 = torch.aten.clone %392, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %394 = torch.aten._unsafe_view %393, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %395 = torch.aten.transpose.int %390, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
  %396 = torch.aten.expand %386, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %397 = torch.aten._reshape_alias %396, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %398 = torch.aten.expand %395, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
  %399 = torch.aten._reshape_alias %398, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
  %400 = torch.aten.bmm %397, %399 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
  %401 = torch.aten._unsafe_view %400, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %402 = torch.aten.mul.Tensor %401, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
  %403 = torch.aten._softmax %402, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %404 = torch.aten.expand %403, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %405 = torch.aten._reshape_alias %404, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %406 = torch.aten.expand %394, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %407 = torch.aten._reshape_alias %406, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %408 = torch.aten.bmm %405, %407 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %409 = torch.aten._unsafe_view %408, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %410 = torch.aten._reshape_alias %409, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %411 = torch.aten.permute %410, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %412 = torch.aten.clone %411, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %413 = torch.aten._unsafe_view %412, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %414 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %415 = torch.aten.view %413, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %416 = torch.aten.addmm %55, %415, %414, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %417 = torch.aten.view %416, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %418 = torch.aten.add.Tensor %417, %370, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_10, %result1_11, %result2_12 = torch.aten.native_layer_norm %418, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %419 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %420 = torch.aten._reshape_alias %result0_10, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %421 = torch.aten.mm %420, %419 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %422 = torch.aten._unsafe_view %421, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %423 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %424 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %425 = torch.aten.mm %424, %423 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %426 = torch.aten._unsafe_view %425, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %427 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %428 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %429 = torch.aten.mm %428, %427 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %430 = torch.aten._unsafe_view %429, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %431 = torch.aten._reshape_alias %422, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %432 = torch.aten.permute %431, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %433 = torch.aten.clone %432, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %434 = torch.aten._unsafe_view %433, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %435 = torch.aten._reshape_alias %426, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %436 = torch.aten.permute %435, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %437 = torch.aten.clone %436, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %438 = torch.aten._unsafe_view %437, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %439 = torch.aten._reshape_alias %430, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %440 = torch.aten.permute %439, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %441 = torch.aten.clone %440, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %442 = torch.aten._unsafe_view %441, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %443 = torch.aten.transpose.int %438, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
  %444 = torch.aten.expand %434, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %445 = torch.aten._reshape_alias %444, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %446 = torch.aten.expand %443, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
  %447 = torch.aten._reshape_alias %446, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
  %448 = torch.aten.bmm %445, %447 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
  %449 = torch.aten._unsafe_view %448, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %450 = torch.aten.mul.Tensor %449, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
  %451 = torch.aten._softmax %450, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %452 = torch.aten.expand %451, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %453 = torch.aten._reshape_alias %452, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %454 = torch.aten.expand %442, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
  %455 = torch.aten._reshape_alias %454, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %456 = torch.aten.bmm %453, %455 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %457 = torch.aten._unsafe_view %456, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %458 = torch.aten._reshape_alias %457, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %459 = torch.aten.permute %458, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %460 = torch.aten.clone %459, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %461 = torch.aten._unsafe_view %460, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %462 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %463 = torch.aten.view %461, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %464 = torch.aten.addmm %55, %463, %462, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %465 = torch.aten.view %464, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %466 = torch.aten.add.Tensor %465, %418, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_13, %result1_14, %result2_15 = torch.aten.native_layer_norm %466, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %467 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %468 = torch.aten.view %result0_13, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %469 = torch.aten.addmm %52, %468, %467, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
  %470 = torch.aten.view %469, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
  %471 = torch.aten.slice.Tensor %470, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %472 = torch.aten.slice.Tensor %470, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %473 = torch.aten.gelu %472, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
  %474 = torch.aten.mul.Tensor %471, %473 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
  %475 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %476 = torch.aten.view %474, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
  %477 = torch.aten.addmm %55, %476, %475, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %478 = torch.aten.view %477, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %479 = torch.aten.add.Tensor %478, %466, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %480 = torch.aten._reshape_alias %479, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %481 = torch.aten.permute %480, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
  %482 = torch.aten._convolution %481, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %483 = torch.aten.add.Tensor %482, %350, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %484 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
  %485 = torch.aten._convolution %483, %47, %55, %484, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,32,32],f16>
  %486 = torch.aten.clone %485, %int0 : !torch.vtensor<[2,320,32,32],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16>
  %487 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %488 = torch.aten.view %486, %487 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16>
  %489 = torch.aten._to_copy %488, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f32>
  %490 = torch.aten.var.correction %489, %93, %int0, %true : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %491 = torch.aten.mean.dim %489, %93, %true, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %492 = torch.aten.add.Tensor %490, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %493 = torch.aten.rsqrt %492 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %494 = torch.aten.sub.Tensor %488, %491, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,1024],f32>
  %495 = torch.aten.mul.Tensor %494, %493 : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,1024],f32>
  %496 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %497 = torch.aten.view %495, %496 : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f32>
  %498 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %499 = torch.aten.unsqueeze %498, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %500 = torch.aten.mul.Tensor %497, %499 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f32>
  %501 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %502 = torch.aten.unsqueeze %501, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %503 = torch.aten.add.Tensor %500, %502, %int1 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f32>
  %504 = torch.aten._to_copy %503, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,32,32],f16>
  %505 = torch.aten.silu %504 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16>
  %506 = torch.aten._convolution %505, %11, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %507 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %508 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %509 = torch.aten.addmm %45, %507, %508, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %510 = torch.aten.slice.Tensor %509, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %511 = torch.aten.slice.Tensor %510, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %512 = torch.aten.unsqueeze %511, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %513 = torch.aten.unsqueeze %512, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %514 = torch.aten.add.Tensor %506, %513, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %515 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %516 = torch.aten.view %514, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %517 = torch.aten._to_copy %516, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %518 = torch.aten.var.correction %517, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %519 = torch.aten.mean.dim %517, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %520 = torch.aten.add.Tensor %518, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %521 = torch.aten.rsqrt %520 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %522 = torch.aten.sub.Tensor %516, %519, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %523 = torch.aten.mul.Tensor %522, %521 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %524 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %525 = torch.aten.view %523, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %526 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %527 = torch.aten.unsqueeze %526, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %528 = torch.aten.mul.Tensor %525, %527 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %529 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %530 = torch.aten.unsqueeze %529, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %531 = torch.aten.add.Tensor %528, %530, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %532 = torch.aten._to_copy %531, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %533 = torch.aten.silu %532 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
  %534 = torch.aten._convolution %533, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %535 = torch.aten._convolution %485, %12, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %536 = torch.aten.add.Tensor %535, %534, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %537 = torch.aten.div.Tensor %536, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
  %538 = torch.aten.clone %537, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %539 = torch.aten.view %538, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %540 = torch.aten._to_copy %539, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %541 = torch.aten.var.correction %540, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %542 = torch.aten.mean.dim %540, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %543 = torch.aten.add.Tensor %541, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %544 = torch.aten.rsqrt %543 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %545 = torch.aten.sub.Tensor %539, %542, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %546 = torch.aten.mul.Tensor %545, %544 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %547 = torch.aten.view %546, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %548 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %549 = torch.aten.unsqueeze %548, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %550 = torch.aten.mul.Tensor %547, %549 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %551 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %552 = torch.aten.unsqueeze %551, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %553 = torch.aten.add.Tensor %550, %552, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %554 = torch.aten._to_copy %553, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %555 = torch.aten._convolution %554, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %556 = torch.aten.permute %555, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %557 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %558 = torch.prim.ListConstruct %int655360, %int1, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %559 = torch.aten._reshape_alias %556, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %560 = torch.prim.ListConstruct %int640 : (!torch.int) -> !torch.list<int>
  %result0_16, %result1_17, %result2_18 = torch.aten.native_layer_norm %559, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %561 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %562 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
  %563 = torch.prim.ListConstruct %int640, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %564 = torch.aten._reshape_alias %result0_16, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %565 = torch.aten.mm %564, %561 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %566 = torch.aten._unsafe_view %565, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %567 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %568 = torch.aten._reshape_alias %result0_16, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %569 = torch.aten.mm %568, %567 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %570 = torch.aten._unsafe_view %569, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %571 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %572 = torch.aten._reshape_alias %result0_16, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %573 = torch.aten.mm %572, %571 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %574 = torch.aten._unsafe_view %573, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %575 = torch.prim.ListConstruct %int2, %int1024, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %576 = torch.prim.ListConstruct %int655360, %int640, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %577 = torch.aten._reshape_alias %566, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %578 = torch.aten.permute %577, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %579 = torch.aten.clone %578, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %580 = torch.prim.ListConstruct %int16, %int1024, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %581 = torch.aten._unsafe_view %579, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %582 = torch.aten._reshape_alias %570, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %583 = torch.aten.permute %582, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %584 = torch.aten.clone %583, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %585 = torch.aten._unsafe_view %584, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %586 = torch.aten._reshape_alias %574, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %587 = torch.aten.permute %586, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %588 = torch.aten.clone %587, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %589 = torch.aten._unsafe_view %588, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %590 = torch.aten.transpose.int %585, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
  %591 = torch.aten.expand %581, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %592 = torch.prim.ListConstruct %int81920, %int80, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %593 = torch.aten._reshape_alias %591, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %594 = torch.prim.ListConstruct %int16, %int80, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %595 = torch.aten.expand %590, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
  %596 = torch.prim.ListConstruct %int81920, %int1, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %597 = torch.aten._reshape_alias %595, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
  %598 = torch.aten.bmm %593, %597 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
  %599 = torch.prim.ListConstruct %int16, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %600 = torch.aten._unsafe_view %598, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %601 = torch.aten.mul.Tensor %600, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
  %602 = torch.aten._softmax %601, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %603 = torch.aten.expand %602, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %604 = torch.prim.ListConstruct %int1048576, %int1024, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %605 = torch.aten._reshape_alias %603, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %606 = torch.aten.expand %589, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %607 = torch.aten._reshape_alias %606, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %608 = torch.aten.bmm %605, %607 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %609 = torch.aten._unsafe_view %608, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %610 = torch.prim.ListConstruct %int2, %int8, %int1024, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %611 = torch.prim.ListConstruct %int655360, %int81920, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %612 = torch.aten._reshape_alias %609, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %613 = torch.aten.permute %612, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %614 = torch.aten.clone %613, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %615 = torch.aten._unsafe_view %614, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %616 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %617 = torch.aten.view %615, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %618 = torch.aten.addmm %45, %617, %616, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %619 = torch.aten.view %618, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %620 = torch.aten.add.Tensor %619, %559, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_19, %result1_20, %result2_21 = torch.aten.native_layer_norm %620, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %621 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %622 = torch.aten._reshape_alias %result0_19, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %623 = torch.aten.mm %622, %621 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %624 = torch.aten._unsafe_view %623, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %625 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %626 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %627 = torch.aten.mm %626, %625 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %628 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %629 = torch.aten._unsafe_view %627, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %630 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %631 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %632 = torch.aten.mm %631, %630 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %633 = torch.aten._unsafe_view %632, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %634 = torch.aten._reshape_alias %624, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %635 = torch.aten.permute %634, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %636 = torch.aten.clone %635, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %637 = torch.aten._unsafe_view %636, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %638 = torch.prim.ListConstruct %int2, %int77, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %639 = torch.prim.ListConstruct %int49280, %int640, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %640 = torch.aten._reshape_alias %629, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %641 = torch.aten.permute %640, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %642 = torch.aten.clone %641, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %643 = torch.prim.ListConstruct %int16, %int77, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %644 = torch.aten._unsafe_view %642, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %645 = torch.aten._reshape_alias %633, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %646 = torch.aten.permute %645, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %647 = torch.aten.clone %646, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %648 = torch.aten._unsafe_view %647, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %649 = torch.aten.transpose.int %644, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
  %650 = torch.aten.expand %637, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %651 = torch.aten._reshape_alias %650, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %652 = torch.prim.ListConstruct %int16, %int80, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %653 = torch.aten.expand %649, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
  %654 = torch.prim.ListConstruct %int6160, %int1, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %655 = torch.aten._reshape_alias %653, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
  %656 = torch.aten.bmm %651, %655 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
  %657 = torch.prim.ListConstruct %int16, %int1024, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %658 = torch.aten._unsafe_view %656, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %659 = torch.aten.mul.Tensor %658, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
  %660 = torch.aten._softmax %659, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %661 = torch.aten.expand %660, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %662 = torch.prim.ListConstruct %int78848, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %663 = torch.aten._reshape_alias %661, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %664 = torch.aten.expand %648, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
  %665 = torch.prim.ListConstruct %int6160, %int80, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %666 = torch.aten._reshape_alias %664, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %667 = torch.aten.bmm %663, %666 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %668 = torch.aten._unsafe_view %667, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %669 = torch.aten._reshape_alias %668, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %670 = torch.aten.permute %669, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %671 = torch.aten.clone %670, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %672 = torch.aten._unsafe_view %671, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %673 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %674 = torch.aten.view %672, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %675 = torch.aten.addmm %45, %674, %673, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %676 = torch.aten.view %675, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %677 = torch.aten.add.Tensor %676, %620, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_22, %result1_23, %result2_24 = torch.aten.native_layer_norm %677, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %678 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %679 = torch.aten.view %result0_22, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %680 = torch.aten.addmm %38, %679, %678, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
  %681 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %682 = torch.aten.view %680, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
  %683 = torch.aten.slice.Tensor %682, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %684 = torch.aten.slice.Tensor %682, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %685 = torch.aten.gelu %684, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
  %686 = torch.aten.mul.Tensor %683, %685 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
  %687 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %688 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
  %689 = torch.aten.view %686, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
  %690 = torch.aten.addmm %45, %689, %687, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %691 = torch.aten.view %690, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %692 = torch.aten.add.Tensor %691, %677, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %693 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %694 = torch.prim.ListConstruct %int655360, %int20480, %int640, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %695 = torch.aten._reshape_alias %692, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %696 = torch.aten.permute %695, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
  %697 = torch.aten._convolution %696, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %698 = torch.aten.add.Tensor %697, %537, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %699 = torch.aten.clone %698, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %700 = torch.aten.view %699, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %701 = torch.aten._to_copy %700, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %702 = torch.aten.var.correction %701, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %703 = torch.aten.mean.dim %701, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %704 = torch.aten.add.Tensor %702, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %705 = torch.aten.rsqrt %704 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %706 = torch.aten.sub.Tensor %700, %703, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %707 = torch.aten.mul.Tensor %706, %705 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %708 = torch.aten.view %707, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %709 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %710 = torch.aten.unsqueeze %709, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %711 = torch.aten.mul.Tensor %708, %710 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %712 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %713 = torch.aten.unsqueeze %712, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %714 = torch.aten.add.Tensor %711, %713, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %715 = torch.aten._to_copy %714, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %716 = torch.aten.silu %715 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
  %717 = torch.aten._convolution %716, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %718 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %719 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %720 = torch.aten.addmm %45, %718, %719, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %721 = torch.aten.slice.Tensor %720, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %722 = torch.aten.slice.Tensor %721, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %723 = torch.aten.unsqueeze %722, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %724 = torch.aten.unsqueeze %723, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %725 = torch.aten.add.Tensor %717, %724, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %726 = torch.aten.view %725, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %727 = torch.aten._to_copy %726, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %728 = torch.aten.var.correction %727, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %729 = torch.aten.mean.dim %727, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %730 = torch.aten.add.Tensor %728, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %731 = torch.aten.rsqrt %730 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %732 = torch.aten.sub.Tensor %726, %729, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %733 = torch.aten.mul.Tensor %732, %731 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %734 = torch.aten.view %733, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %735 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %736 = torch.aten.unsqueeze %735, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %737 = torch.aten.mul.Tensor %734, %736 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %738 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %739 = torch.aten.unsqueeze %738, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %740 = torch.aten.add.Tensor %737, %739, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %741 = torch.aten._to_copy %740, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %742 = torch.aten.silu %741 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
  %743 = torch.aten._convolution %742, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %744 = torch.aten.add.Tensor %698, %743, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %745 = torch.aten.div.Tensor %744, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
  %746 = torch.aten.clone %745, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %747 = torch.aten.view %746, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %748 = torch.aten._to_copy %747, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %749 = torch.aten.var.correction %748, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %750 = torch.aten.mean.dim %748, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %751 = torch.aten.add.Tensor %749, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %752 = torch.aten.rsqrt %751 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %753 = torch.aten.sub.Tensor %747, %750, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %754 = torch.aten.mul.Tensor %753, %752 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %755 = torch.aten.view %754, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %756 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %757 = torch.aten.unsqueeze %756, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %758 = torch.aten.mul.Tensor %755, %757 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %759 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %760 = torch.aten.unsqueeze %759, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %761 = torch.aten.add.Tensor %758, %760, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %762 = torch.aten._to_copy %761, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %763 = torch.aten._convolution %762, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %764 = torch.aten.permute %763, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %765 = torch.aten._reshape_alias %764, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %result0_25, %result1_26, %result2_27 = torch.aten.native_layer_norm %765, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %766 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %767 = torch.aten._reshape_alias %result0_25, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %768 = torch.aten.mm %767, %766 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %769 = torch.aten._unsafe_view %768, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %770 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %771 = torch.aten._reshape_alias %result0_25, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %772 = torch.aten.mm %771, %770 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %773 = torch.aten._unsafe_view %772, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %774 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %775 = torch.aten._reshape_alias %result0_25, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %776 = torch.aten.mm %775, %774 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %777 = torch.aten._unsafe_view %776, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %778 = torch.aten._reshape_alias %769, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %779 = torch.aten.permute %778, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %780 = torch.aten.clone %779, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %781 = torch.aten._unsafe_view %780, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %782 = torch.aten._reshape_alias %773, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %783 = torch.aten.permute %782, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %784 = torch.aten.clone %783, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %785 = torch.aten._unsafe_view %784, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %786 = torch.aten._reshape_alias %777, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %787 = torch.aten.permute %786, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %788 = torch.aten.clone %787, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %789 = torch.aten._unsafe_view %788, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %790 = torch.aten.transpose.int %785, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
  %791 = torch.aten.expand %781, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %792 = torch.aten._reshape_alias %791, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %793 = torch.aten.expand %790, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
  %794 = torch.aten._reshape_alias %793, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
  %795 = torch.aten.bmm %792, %794 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
  %796 = torch.aten._unsafe_view %795, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %797 = torch.aten.mul.Tensor %796, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
  %798 = torch.aten._softmax %797, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %799 = torch.aten.expand %798, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %800 = torch.aten._reshape_alias %799, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %801 = torch.aten.expand %789, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %802 = torch.aten._reshape_alias %801, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %803 = torch.aten.bmm %800, %802 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %804 = torch.aten._unsafe_view %803, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %805 = torch.aten._reshape_alias %804, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %806 = torch.aten.permute %805, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %807 = torch.aten.clone %806, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %808 = torch.aten._unsafe_view %807, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %809 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %810 = torch.aten.view %808, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %811 = torch.aten.addmm %45, %810, %809, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %812 = torch.aten.view %811, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %813 = torch.aten.add.Tensor %812, %765, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_28, %result1_29, %result2_30 = torch.aten.native_layer_norm %813, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %814 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %815 = torch.aten._reshape_alias %result0_28, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %816 = torch.aten.mm %815, %814 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %817 = torch.aten._unsafe_view %816, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %818 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %819 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %820 = torch.aten.mm %819, %818 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %821 = torch.aten._unsafe_view %820, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %822 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %823 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %824 = torch.aten.mm %823, %822 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %825 = torch.aten._unsafe_view %824, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %826 = torch.aten._reshape_alias %817, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %827 = torch.aten.permute %826, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %828 = torch.aten.clone %827, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %829 = torch.aten._unsafe_view %828, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %830 = torch.aten._reshape_alias %821, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %831 = torch.aten.permute %830, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %832 = torch.aten.clone %831, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %833 = torch.aten._unsafe_view %832, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %834 = torch.aten._reshape_alias %825, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %835 = torch.aten.permute %834, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %836 = torch.aten.clone %835, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %837 = torch.aten._unsafe_view %836, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %838 = torch.aten.transpose.int %833, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
  %839 = torch.aten.expand %829, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %840 = torch.aten._reshape_alias %839, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %841 = torch.aten.expand %838, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
  %842 = torch.aten._reshape_alias %841, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
  %843 = torch.aten.bmm %840, %842 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
  %844 = torch.aten._unsafe_view %843, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %845 = torch.aten.mul.Tensor %844, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
  %846 = torch.aten._softmax %845, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %847 = torch.aten.expand %846, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %848 = torch.aten._reshape_alias %847, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %849 = torch.aten.expand %837, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
  %850 = torch.aten._reshape_alias %849, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %851 = torch.aten.bmm %848, %850 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %852 = torch.aten._unsafe_view %851, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %853 = torch.aten._reshape_alias %852, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %854 = torch.aten.permute %853, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %855 = torch.aten.clone %854, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %856 = torch.aten._unsafe_view %855, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %857 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %858 = torch.aten.view %856, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %859 = torch.aten.addmm %45, %858, %857, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %860 = torch.aten.view %859, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %861 = torch.aten.add.Tensor %860, %813, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_31, %result1_32, %result2_33 = torch.aten.native_layer_norm %861, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %862 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %863 = torch.aten.view %result0_31, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %864 = torch.aten.addmm %38, %863, %862, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
  %865 = torch.aten.view %864, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
  %866 = torch.aten.slice.Tensor %865, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %867 = torch.aten.slice.Tensor %865, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %868 = torch.aten.gelu %867, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
  %869 = torch.aten.mul.Tensor %866, %868 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
  %870 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %871 = torch.aten.view %869, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
  %872 = torch.aten.addmm %45, %871, %870, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %873 = torch.aten.view %872, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %874 = torch.aten.add.Tensor %873, %861, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %875 = torch.aten._reshape_alias %874, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %876 = torch.aten.permute %875, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
  %877 = torch.aten._convolution %876, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %878 = torch.aten.add.Tensor %877, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %879 = torch.aten._convolution %878, %41, %45, %484, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,16,16],f16>
  %880 = torch.aten.clone %879, %int0 : !torch.vtensor<[2,640,16,16],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16>
  %881 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %882 = torch.aten.view %880, %881 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16>
  %883 = torch.aten._to_copy %882, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f32>
  %884 = torch.aten.var.correction %883, %93, %int0, %true : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %885 = torch.aten.mean.dim %883, %93, %true, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %886 = torch.aten.add.Tensor %884, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %887 = torch.aten.rsqrt %886 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %888 = torch.aten.sub.Tensor %882, %885, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,256],f32>
  %889 = torch.aten.mul.Tensor %888, %887 : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,256],f32>
  %890 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %891 = torch.aten.view %889, %890 : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f32>
  %892 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %893 = torch.aten.unsqueeze %892, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %894 = torch.aten.mul.Tensor %891, %893 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f32>
  %895 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %896 = torch.aten.unsqueeze %895, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %897 = torch.aten.add.Tensor %894, %896, %int1 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f32>
  %898 = torch.aten._to_copy %897, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,16,16],f16>
  %899 = torch.aten.silu %898 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16>
  %900 = torch.aten._convolution %899, %13, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %901 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %902 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %903 = torch.aten.addmm %29, %901, %902, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %904 = torch.aten.slice.Tensor %903, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %905 = torch.aten.slice.Tensor %904, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %906 = torch.aten.unsqueeze %905, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %907 = torch.aten.unsqueeze %906, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %908 = torch.aten.add.Tensor %900, %907, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %909 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %910 = torch.aten.view %908, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %911 = torch.aten._to_copy %910, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %912 = torch.aten.var.correction %911, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %913 = torch.aten.mean.dim %911, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %914 = torch.aten.add.Tensor %912, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %915 = torch.aten.rsqrt %914 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %916 = torch.aten.sub.Tensor %910, %913, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %917 = torch.aten.mul.Tensor %916, %915 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %918 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %919 = torch.aten.view %917, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %920 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %921 = torch.aten.unsqueeze %920, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %922 = torch.aten.mul.Tensor %919, %921 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %923 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %924 = torch.aten.unsqueeze %923, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %925 = torch.aten.add.Tensor %922, %924, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %926 = torch.aten._to_copy %925, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %927 = torch.aten.silu %926 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
  %928 = torch.aten._convolution %927, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %929 = torch.aten._convolution %879, %14, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %930 = torch.aten.add.Tensor %929, %928, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %931 = torch.aten.div.Tensor %930, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
  %932 = torch.aten.clone %931, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %933 = torch.aten.view %932, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %934 = torch.aten._to_copy %933, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %935 = torch.aten.var.correction %934, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %936 = torch.aten.mean.dim %934, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %937 = torch.aten.add.Tensor %935, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %938 = torch.aten.rsqrt %937 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %939 = torch.aten.sub.Tensor %933, %936, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %940 = torch.aten.mul.Tensor %939, %938 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %941 = torch.aten.view %940, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %942 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %943 = torch.aten.unsqueeze %942, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %944 = torch.aten.mul.Tensor %941, %943 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %945 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %946 = torch.aten.unsqueeze %945, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %947 = torch.aten.add.Tensor %944, %946, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %948 = torch.aten._to_copy %947, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %949 = torch.aten._convolution %948, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %950 = torch.aten.permute %949, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %951 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %952 = torch.prim.ListConstruct %int327680, %int1, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %953 = torch.aten._reshape_alias %950, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %954 = torch.prim.ListConstruct %int1280 : (!torch.int) -> !torch.list<int>
  %result0_34, %result1_35, %result2_36 = torch.aten.native_layer_norm %953, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %955 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %956 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
  %957 = torch.prim.ListConstruct %int1280, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %958 = torch.aten._reshape_alias %result0_34, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %959 = torch.aten.mm %958, %955 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %960 = torch.aten._unsafe_view %959, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %961 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %962 = torch.aten._reshape_alias %result0_34, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %963 = torch.aten.mm %962, %961 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %964 = torch.aten._unsafe_view %963, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %965 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %966 = torch.aten._reshape_alias %result0_34, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %967 = torch.aten.mm %966, %965 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %968 = torch.aten._unsafe_view %967, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %969 = torch.prim.ListConstruct %int2, %int256, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %970 = torch.prim.ListConstruct %int327680, %int1280, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %971 = torch.aten._reshape_alias %960, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %972 = torch.aten.permute %971, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %973 = torch.aten.clone %972, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %974 = torch.prim.ListConstruct %int16, %int256, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %975 = torch.aten._unsafe_view %973, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %976 = torch.aten._reshape_alias %964, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %977 = torch.aten.permute %976, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %978 = torch.aten.clone %977, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %979 = torch.aten._unsafe_view %978, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %980 = torch.aten._reshape_alias %968, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %981 = torch.aten.permute %980, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %982 = torch.aten.clone %981, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %983 = torch.aten._unsafe_view %982, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %984 = torch.aten.transpose.int %979, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
  %985 = torch.aten.expand %975, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %986 = torch.prim.ListConstruct %int40960, %int160, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %987 = torch.aten._reshape_alias %985, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %988 = torch.prim.ListConstruct %int16, %int160, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %989 = torch.aten.expand %984, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
  %990 = torch.prim.ListConstruct %int40960, %int1, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %991 = torch.aten._reshape_alias %989, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
  %992 = torch.aten.bmm %987, %991 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
  %993 = torch.prim.ListConstruct %int16, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %994 = torch.aten._unsafe_view %992, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %995 = torch.aten.mul.Tensor %994, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
  %996 = torch.aten._softmax %995, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %997 = torch.aten.expand %996, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %998 = torch.prim.ListConstruct %int65536, %int256, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %999 = torch.aten._reshape_alias %997, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %1000 = torch.aten.expand %983, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1001 = torch.aten._reshape_alias %1000, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1002 = torch.aten.bmm %999, %1001 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %1003 = torch.aten._unsafe_view %1002, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1004 = torch.prim.ListConstruct %int2, %int8, %int256, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1005 = torch.prim.ListConstruct %int327680, %int40960, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1006 = torch.aten._reshape_alias %1003, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1007 = torch.aten.permute %1006, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1008 = torch.aten.clone %1007, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %1009 = torch.aten._unsafe_view %1008, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1010 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1011 = torch.aten.view %1009, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1012 = torch.aten.addmm %29, %1011, %1010, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1013 = torch.aten.view %1012, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1014 = torch.aten.add.Tensor %1013, %953, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_37, %result1_38, %result2_39 = torch.aten.native_layer_norm %1014, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1015 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1016 = torch.aten._reshape_alias %result0_37, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1017 = torch.aten.mm %1016, %1015 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1018 = torch.aten._unsafe_view %1017, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1019 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1020 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1021 = torch.aten.mm %1020, %1019 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1022 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1023 = torch.aten._unsafe_view %1021, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1024 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1025 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1026 = torch.aten.mm %1025, %1024 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1027 = torch.aten._unsafe_view %1026, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1028 = torch.aten._reshape_alias %1018, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1029 = torch.aten.permute %1028, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1030 = torch.aten.clone %1029, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1031 = torch.aten._unsafe_view %1030, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1032 = torch.prim.ListConstruct %int2, %int77, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1033 = torch.prim.ListConstruct %int98560, %int1280, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1034 = torch.aten._reshape_alias %1023, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1035 = torch.aten.permute %1034, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1036 = torch.aten.clone %1035, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1037 = torch.prim.ListConstruct %int16, %int77, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1038 = torch.aten._unsafe_view %1036, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1039 = torch.aten._reshape_alias %1027, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1040 = torch.aten.permute %1039, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1041 = torch.aten.clone %1040, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1042 = torch.aten._unsafe_view %1041, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1043 = torch.aten.transpose.int %1038, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
  %1044 = torch.aten.expand %1031, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1045 = torch.aten._reshape_alias %1044, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1046 = torch.prim.ListConstruct %int16, %int160, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1047 = torch.aten.expand %1043, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
  %1048 = torch.prim.ListConstruct %int12320, %int1, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1049 = torch.aten._reshape_alias %1047, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
  %1050 = torch.aten.bmm %1045, %1049 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
  %1051 = torch.prim.ListConstruct %int16, %int256, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1052 = torch.aten._unsafe_view %1050, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %1053 = torch.aten.mul.Tensor %1052, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
  %1054 = torch.aten._softmax %1053, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %1055 = torch.aten.expand %1054, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %1056 = torch.prim.ListConstruct %int19712, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1057 = torch.aten._reshape_alias %1055, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %1058 = torch.aten.expand %1042, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
  %1059 = torch.prim.ListConstruct %int12320, %int160, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1060 = torch.aten._reshape_alias %1058, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1061 = torch.aten.bmm %1057, %1060 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %1062 = torch.aten._unsafe_view %1061, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1063 = torch.aten._reshape_alias %1062, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1064 = torch.aten.permute %1063, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1065 = torch.aten.clone %1064, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %1066 = torch.aten._unsafe_view %1065, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1067 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1068 = torch.aten.view %1066, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1069 = torch.aten.addmm %29, %1068, %1067, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1070 = torch.aten.view %1069, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1071 = torch.aten.add.Tensor %1070, %1014, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_40, %result1_41, %result2_42 = torch.aten.native_layer_norm %1071, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1072 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %1073 = torch.aten.view %result0_40, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1074 = torch.aten.addmm %22, %1073, %1072, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
  %1075 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1076 = torch.aten.view %1074, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
  %1077 = torch.aten.slice.Tensor %1076, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %1078 = torch.aten.slice.Tensor %1076, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %1079 = torch.aten.gelu %1078, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
  %1080 = torch.aten.mul.Tensor %1077, %1079 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
  %1081 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %1082 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
  %1083 = torch.aten.view %1080, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
  %1084 = torch.aten.addmm %29, %1083, %1081, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1085 = torch.aten.view %1084, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1086 = torch.aten.add.Tensor %1085, %1071, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %1087 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1088 = torch.prim.ListConstruct %int327680, %int20480, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1089 = torch.aten._reshape_alias %1086, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %1090 = torch.aten.permute %1089, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
  %1091 = torch.aten._convolution %1090, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1092 = torch.aten.add.Tensor %1091, %931, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1093 = torch.aten.clone %1092, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1094 = torch.aten.view %1093, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %1095 = torch.aten._to_copy %1094, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %1096 = torch.aten.var.correction %1095, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1097 = torch.aten.mean.dim %1095, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1098 = torch.aten.add.Tensor %1096, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1099 = torch.aten.rsqrt %1098 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1100 = torch.aten.sub.Tensor %1094, %1097, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %1101 = torch.aten.mul.Tensor %1100, %1099 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %1102 = torch.aten.view %1101, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %1103 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1104 = torch.aten.unsqueeze %1103, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1105 = torch.aten.mul.Tensor %1102, %1104 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %1106 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1107 = torch.aten.unsqueeze %1106, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1108 = torch.aten.add.Tensor %1105, %1107, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %1109 = torch.aten._to_copy %1108, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %1110 = torch.aten.silu %1109 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
  %1111 = torch.aten._convolution %1110, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1112 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1113 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1114 = torch.aten.addmm %29, %1112, %1113, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1115 = torch.aten.slice.Tensor %1114, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1116 = torch.aten.slice.Tensor %1115, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1117 = torch.aten.unsqueeze %1116, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1118 = torch.aten.unsqueeze %1117, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1119 = torch.aten.add.Tensor %1111, %1118, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1120 = torch.aten.view %1119, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %1121 = torch.aten._to_copy %1120, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %1122 = torch.aten.var.correction %1121, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1123 = torch.aten.mean.dim %1121, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1124 = torch.aten.add.Tensor %1122, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1125 = torch.aten.rsqrt %1124 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1126 = torch.aten.sub.Tensor %1120, %1123, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %1127 = torch.aten.mul.Tensor %1126, %1125 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %1128 = torch.aten.view %1127, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %1129 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1130 = torch.aten.unsqueeze %1129, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1131 = torch.aten.mul.Tensor %1128, %1130 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %1132 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1133 = torch.aten.unsqueeze %1132, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1134 = torch.aten.add.Tensor %1131, %1133, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %1135 = torch.aten._to_copy %1134, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %1136 = torch.aten.silu %1135 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
  %1137 = torch.aten._convolution %1136, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1138 = torch.aten.add.Tensor %1092, %1137, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1139 = torch.aten.div.Tensor %1138, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
  %1140 = torch.aten.clone %1139, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1141 = torch.aten.view %1140, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %1142 = torch.aten._to_copy %1141, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %1143 = torch.aten.var.correction %1142, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1144 = torch.aten.mean.dim %1142, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1145 = torch.aten.add.Tensor %1143, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1146 = torch.aten.rsqrt %1145 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1147 = torch.aten.sub.Tensor %1141, %1144, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %1148 = torch.aten.mul.Tensor %1147, %1146 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %1149 = torch.aten.view %1148, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %1150 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1151 = torch.aten.unsqueeze %1150, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1152 = torch.aten.mul.Tensor %1149, %1151 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %1153 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1154 = torch.aten.unsqueeze %1153, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1155 = torch.aten.add.Tensor %1152, %1154, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %1156 = torch.aten._to_copy %1155, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %1157 = torch.aten._convolution %1156, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1158 = torch.aten.permute %1157, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %1159 = torch.aten._reshape_alias %1158, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %result0_43, %result1_44, %result2_45 = torch.aten.native_layer_norm %1159, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1160 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1161 = torch.aten._reshape_alias %result0_43, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1162 = torch.aten.mm %1161, %1160 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1163 = torch.aten._unsafe_view %1162, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1164 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1165 = torch.aten._reshape_alias %result0_43, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1166 = torch.aten.mm %1165, %1164 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1167 = torch.aten._unsafe_view %1166, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1168 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1169 = torch.aten._reshape_alias %result0_43, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1170 = torch.aten.mm %1169, %1168 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1171 = torch.aten._unsafe_view %1170, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1172 = torch.aten._reshape_alias %1163, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1173 = torch.aten.permute %1172, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1174 = torch.aten.clone %1173, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1175 = torch.aten._unsafe_view %1174, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1176 = torch.aten._reshape_alias %1167, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1177 = torch.aten.permute %1176, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1178 = torch.aten.clone %1177, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1179 = torch.aten._unsafe_view %1178, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1180 = torch.aten._reshape_alias %1171, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1181 = torch.aten.permute %1180, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1182 = torch.aten.clone %1181, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1183 = torch.aten._unsafe_view %1182, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1184 = torch.aten.transpose.int %1179, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
  %1185 = torch.aten.expand %1175, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1186 = torch.aten._reshape_alias %1185, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1187 = torch.aten.expand %1184, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
  %1188 = torch.aten._reshape_alias %1187, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
  %1189 = torch.aten.bmm %1186, %1188 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
  %1190 = torch.aten._unsafe_view %1189, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %1191 = torch.aten.mul.Tensor %1190, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
  %1192 = torch.aten._softmax %1191, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %1193 = torch.aten.expand %1192, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %1194 = torch.aten._reshape_alias %1193, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %1195 = torch.aten.expand %1183, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1196 = torch.aten._reshape_alias %1195, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1197 = torch.aten.bmm %1194, %1196 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %1198 = torch.aten._unsafe_view %1197, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1199 = torch.aten._reshape_alias %1198, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1200 = torch.aten.permute %1199, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1201 = torch.aten.clone %1200, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %1202 = torch.aten._unsafe_view %1201, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1203 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1204 = torch.aten.view %1202, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1205 = torch.aten.addmm %29, %1204, %1203, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1206 = torch.aten.view %1205, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1207 = torch.aten.add.Tensor %1206, %1159, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_46, %result1_47, %result2_48 = torch.aten.native_layer_norm %1207, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1208 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1209 = torch.aten._reshape_alias %result0_46, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1210 = torch.aten.mm %1209, %1208 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1211 = torch.aten._unsafe_view %1210, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1212 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1213 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1214 = torch.aten.mm %1213, %1212 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1215 = torch.aten._unsafe_view %1214, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1216 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1217 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1218 = torch.aten.mm %1217, %1216 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1219 = torch.aten._unsafe_view %1218, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1220 = torch.aten._reshape_alias %1211, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1221 = torch.aten.permute %1220, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1222 = torch.aten.clone %1221, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1223 = torch.aten._unsafe_view %1222, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1224 = torch.aten._reshape_alias %1215, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1225 = torch.aten.permute %1224, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1226 = torch.aten.clone %1225, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1227 = torch.aten._unsafe_view %1226, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1228 = torch.aten._reshape_alias %1219, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1229 = torch.aten.permute %1228, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1230 = torch.aten.clone %1229, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1231 = torch.aten._unsafe_view %1230, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1232 = torch.aten.transpose.int %1227, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
  %1233 = torch.aten.expand %1223, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1234 = torch.aten._reshape_alias %1233, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1235 = torch.aten.expand %1232, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
  %1236 = torch.aten._reshape_alias %1235, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
  %1237 = torch.aten.bmm %1234, %1236 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
  %1238 = torch.aten._unsafe_view %1237, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %1239 = torch.aten.mul.Tensor %1238, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
  %1240 = torch.aten._softmax %1239, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %1241 = torch.aten.expand %1240, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %1242 = torch.aten._reshape_alias %1241, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %1243 = torch.aten.expand %1231, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
  %1244 = torch.aten._reshape_alias %1243, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1245 = torch.aten.bmm %1242, %1244 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %1246 = torch.aten._unsafe_view %1245, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1247 = torch.aten._reshape_alias %1246, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1248 = torch.aten.permute %1247, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1249 = torch.aten.clone %1248, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %1250 = torch.aten._unsafe_view %1249, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1251 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1252 = torch.aten.view %1250, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1253 = torch.aten.addmm %29, %1252, %1251, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1254 = torch.aten.view %1253, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1255 = torch.aten.add.Tensor %1254, %1207, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_49, %result1_50, %result2_51 = torch.aten.native_layer_norm %1255, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1256 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %1257 = torch.aten.view %result0_49, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1258 = torch.aten.addmm %22, %1257, %1256, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
  %1259 = torch.aten.view %1258, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
  %1260 = torch.aten.slice.Tensor %1259, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %1261 = torch.aten.slice.Tensor %1259, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %1262 = torch.aten.gelu %1261, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
  %1263 = torch.aten.mul.Tensor %1260, %1262 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
  %1264 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %1265 = torch.aten.view %1263, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
  %1266 = torch.aten.addmm %29, %1265, %1264, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1267 = torch.aten.view %1266, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1268 = torch.aten.add.Tensor %1267, %1255, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %1269 = torch.aten._reshape_alias %1268, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %1270 = torch.aten.permute %1269, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
  %1271 = torch.aten._convolution %1270, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1272 = torch.aten.add.Tensor %1271, %1139, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1273 = torch.aten._convolution %1272, %25, %29, %484, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1274 = torch.aten.clone %1273, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1275 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1276 = torch.aten.view %1274, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1277 = torch.aten._to_copy %1276, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1278 = torch.aten.var.correction %1277, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1279 = torch.aten.mean.dim %1277, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1280 = torch.aten.add.Tensor %1278, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1281 = torch.aten.rsqrt %1280 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1282 = torch.aten.sub.Tensor %1276, %1279, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1283 = torch.aten.mul.Tensor %1282, %1281 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1284 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1285 = torch.aten.view %1283, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1286 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1287 = torch.aten.unsqueeze %1286, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1288 = torch.aten.mul.Tensor %1285, %1287 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1289 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1290 = torch.aten.unsqueeze %1289, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1291 = torch.aten.add.Tensor %1288, %1290, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1292 = torch.aten._to_copy %1291, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1293 = torch.aten.silu %1292 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1294 = torch.aten._convolution %1293, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1295 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1296 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1297 = torch.aten.addmm %29, %1295, %1296, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1298 = torch.aten.slice.Tensor %1297, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1299 = torch.aten.slice.Tensor %1298, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1300 = torch.aten.unsqueeze %1299, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1301 = torch.aten.unsqueeze %1300, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1302 = torch.aten.add.Tensor %1294, %1301, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1303 = torch.aten.view %1302, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1304 = torch.aten._to_copy %1303, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1305 = torch.aten.var.correction %1304, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1306 = torch.aten.mean.dim %1304, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1307 = torch.aten.add.Tensor %1305, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1308 = torch.aten.rsqrt %1307 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1309 = torch.aten.sub.Tensor %1303, %1306, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1310 = torch.aten.mul.Tensor %1309, %1308 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1311 = torch.aten.view %1310, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1312 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1313 = torch.aten.unsqueeze %1312, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1314 = torch.aten.mul.Tensor %1311, %1313 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1315 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1316 = torch.aten.unsqueeze %1315, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1317 = torch.aten.add.Tensor %1314, %1316, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1318 = torch.aten._to_copy %1317, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1319 = torch.aten.silu %1318 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1320 = torch.aten._convolution %1319, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1321 = torch.aten.add.Tensor %1273, %1320, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1322 = torch.aten.div.Tensor %1321, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1323 = torch.aten.clone %1322, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1324 = torch.aten.view %1323, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1325 = torch.aten._to_copy %1324, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1326 = torch.aten.var.correction %1325, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1327 = torch.aten.mean.dim %1325, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1328 = torch.aten.add.Tensor %1326, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1329 = torch.aten.rsqrt %1328 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1330 = torch.aten.sub.Tensor %1324, %1327, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1331 = torch.aten.mul.Tensor %1330, %1329 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1332 = torch.aten.view %1331, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1333 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1334 = torch.aten.unsqueeze %1333, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1335 = torch.aten.mul.Tensor %1332, %1334 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1336 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1337 = torch.aten.unsqueeze %1336, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1338 = torch.aten.add.Tensor %1335, %1337, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1339 = torch.aten._to_copy %1338, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1340 = torch.aten.silu %1339 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1341 = torch.aten._convolution %1340, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1342 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1343 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1344 = torch.aten.addmm %29, %1342, %1343, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1345 = torch.aten.slice.Tensor %1344, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1346 = torch.aten.slice.Tensor %1345, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1347 = torch.aten.unsqueeze %1346, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1348 = torch.aten.unsqueeze %1347, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1349 = torch.aten.add.Tensor %1341, %1348, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1350 = torch.aten.view %1349, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1351 = torch.aten._to_copy %1350, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1352 = torch.aten.var.correction %1351, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1353 = torch.aten.mean.dim %1351, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1354 = torch.aten.add.Tensor %1352, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1355 = torch.aten.rsqrt %1354 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1356 = torch.aten.sub.Tensor %1350, %1353, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1357 = torch.aten.mul.Tensor %1356, %1355 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1358 = torch.aten.view %1357, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1359 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1360 = torch.aten.unsqueeze %1359, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1361 = torch.aten.mul.Tensor %1358, %1360 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1362 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1363 = torch.aten.unsqueeze %1362, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1364 = torch.aten.add.Tensor %1361, %1363, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1365 = torch.aten._to_copy %1364, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1366 = torch.aten.silu %1365 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1367 = torch.aten._convolution %1366, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1368 = torch.aten.add.Tensor %1322, %1367, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1369 = torch.aten.div.Tensor %1368, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1370 = torch.aten.clone %1369, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1371 = torch.aten.view %1370, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1372 = torch.aten._to_copy %1371, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1373 = torch.aten.var.correction %1372, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1374 = torch.aten.mean.dim %1372, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1375 = torch.aten.add.Tensor %1373, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1376 = torch.aten.rsqrt %1375 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1377 = torch.aten.sub.Tensor %1371, %1374, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1378 = torch.aten.mul.Tensor %1377, %1376 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1379 = torch.aten.view %1378, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1380 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1381 = torch.aten.unsqueeze %1380, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1382 = torch.aten.mul.Tensor %1379, %1381 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1383 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1384 = torch.aten.unsqueeze %1383, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1385 = torch.aten.add.Tensor %1382, %1384, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1386 = torch.aten._to_copy %1385, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1387 = torch.aten.silu %1386 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1388 = torch.aten._convolution %1387, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1389 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1390 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1391 = torch.aten.addmm %29, %1389, %1390, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1392 = torch.aten.slice.Tensor %1391, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1393 = torch.aten.slice.Tensor %1392, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1394 = torch.aten.unsqueeze %1393, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1395 = torch.aten.unsqueeze %1394, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1396 = torch.aten.add.Tensor %1388, %1395, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1397 = torch.aten.view %1396, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1398 = torch.aten._to_copy %1397, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1399 = torch.aten.var.correction %1398, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1400 = torch.aten.mean.dim %1398, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1401 = torch.aten.add.Tensor %1399, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1402 = torch.aten.rsqrt %1401 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1403 = torch.aten.sub.Tensor %1397, %1400, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1404 = torch.aten.mul.Tensor %1403, %1402 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1405 = torch.aten.view %1404, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1406 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1407 = torch.aten.unsqueeze %1406, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1408 = torch.aten.mul.Tensor %1405, %1407 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1409 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1410 = torch.aten.unsqueeze %1409, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1411 = torch.aten.add.Tensor %1408, %1410, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1412 = torch.aten._to_copy %1411, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1413 = torch.aten.silu %1412 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1414 = torch.aten._convolution %1413, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1415 = torch.aten.add.Tensor %1369, %1414, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1416 = torch.aten.div.Tensor %1415, %6 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1417 = torch.aten.clone %1416, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1418 = torch.aten.view %1417, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1419 = torch.aten._to_copy %1418, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1420 = torch.aten.var.correction %1419, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1421 = torch.aten.mean.dim %1419, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1422 = torch.aten.add.Tensor %1420, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1423 = torch.aten.rsqrt %1422 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1424 = torch.aten.sub.Tensor %1418, %1421, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1425 = torch.aten.mul.Tensor %1424, %1423 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1426 = torch.aten.view %1425, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1427 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1428 = torch.aten.unsqueeze %1427, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1429 = torch.aten.mul.Tensor %1426, %1428 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1430 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1431 = torch.aten.unsqueeze %1430, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1432 = torch.aten.add.Tensor %1429, %1431, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1433 = torch.aten._to_copy %1432, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1434 = torch.aten._convolution %1433, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1435 = torch.aten.permute %1434, %156 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
  %1436 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1437 = torch.prim.ListConstruct %int81920, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1438 = torch.aten._reshape_alias %1435, %1436, %1437 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %result0_52, %result1_53, %result2_54 = torch.aten.native_layer_norm %1438, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f32>, !torch.vtensor<[2,64,1],f32>
  %1439 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1440 = torch.prim.ListConstruct %int128, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
  %1441 = torch.aten._reshape_alias %result0_52, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1442 = torch.aten.mm %1441, %1439 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
  %1443 = torch.aten._unsafe_view %1442, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1444 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1445 = torch.aten._reshape_alias %result0_52, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1446 = torch.aten.mm %1445, %1444 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
  %1447 = torch.aten._unsafe_view %1446, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1448 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1449 = torch.aten._reshape_alias %result0_52, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1450 = torch.aten.mm %1449, %1448 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
  %1451 = torch.aten._unsafe_view %1450, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1452 = torch.prim.ListConstruct %int2, %int64, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1453 = torch.prim.ListConstruct %int81920, %int1280, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1454 = torch.aten._reshape_alias %1443, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
  %1455 = torch.aten.permute %1454, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
  %1456 = torch.aten.clone %1455, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
  %1457 = torch.prim.ListConstruct %int16, %int64, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1458 = torch.aten._unsafe_view %1456, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1459 = torch.aten._reshape_alias %1447, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
  %1460 = torch.aten.permute %1459, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
  %1461 = torch.aten.clone %1460, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
  %1462 = torch.aten._unsafe_view %1461, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1463 = torch.aten._reshape_alias %1451, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
  %1464 = torch.aten.permute %1463, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
  %1465 = torch.aten.clone %1464, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
  %1466 = torch.aten._unsafe_view %1465, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1467 = torch.aten.transpose.int %1462, %int-1, %int-2 : !torch.vtensor<[16,64,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,64],f16>
  %1468 = torch.aten.expand %1458, %1457, %false : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,160],f16>
  %1469 = torch.prim.ListConstruct %int10240, %int160, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1470 = torch.aten._reshape_alias %1468, %1457, %1469 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1471 = torch.prim.ListConstruct %int16, %int160, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1472 = torch.aten.expand %1467, %1471, %false : !torch.vtensor<[16,160,64],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,64],f16>
  %1473 = torch.prim.ListConstruct %int10240, %int1, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1474 = torch.aten._reshape_alias %1472, %1471, %1473 : !torch.vtensor<[16,160,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,64],f16>
  %1475 = torch.aten.bmm %1470, %1474 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,64],f16> -> !torch.vtensor<[16,64,64],f16>
  %1476 = torch.prim.ListConstruct %int16, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1477 = torch.aten._unsafe_view %1475, %1476 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
  %1478 = torch.aten.mul.Tensor %1477, %0 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,64],f16>
  %1479 = torch.aten._softmax %1478, %int-1, %false : !torch.vtensor<[16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,64],f16>
  %1480 = torch.aten.expand %1479, %1476, %false : !torch.vtensor<[16,64,64],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,64],f16>
  %1481 = torch.prim.ListConstruct %int4096, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1482 = torch.aten._reshape_alias %1480, %1476, %1481 : !torch.vtensor<[16,64,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
  %1483 = torch.aten.expand %1466, %1457, %false : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,160],f16>
  %1484 = torch.aten._reshape_alias %1483, %1457, %1469 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1485 = torch.aten.bmm %1482, %1484 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,160],f16> -> !torch.vtensor<[16,64,160],f16>
  %1486 = torch.aten._unsafe_view %1485, %1457 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1487 = torch.prim.ListConstruct %int2, %int8, %int64, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1488 = torch.prim.ListConstruct %int81920, %int10240, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1489 = torch.aten._reshape_alias %1486, %1487, %1488 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
  %1490 = torch.aten.permute %1489, %179 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
  %1491 = torch.aten.clone %1490, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
  %1492 = torch.aten._unsafe_view %1491, %1436 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1493 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1494 = torch.aten.view %1492, %1440 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1495 = torch.aten.addmm %29, %1494, %1493, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,1280],f16>
  %1496 = torch.aten.view %1495, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1497 = torch.aten.add.Tensor %1496, %1438, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
  %result0_55, %result1_56, %result2_57 = torch.aten.native_layer_norm %1497, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f32>, !torch.vtensor<[2,64,1],f32>
  %1498 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1499 = torch.aten._reshape_alias %result0_55, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1500 = torch.aten.mm %1499, %1498 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
  %1501 = torch.aten._unsafe_view %1500, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1502 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1503 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1504 = torch.aten.mm %1503, %1502 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1505 = torch.aten._unsafe_view %1504, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1506 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1507 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1508 = torch.aten.mm %1507, %1506 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1509 = torch.aten._unsafe_view %1508, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1510 = torch.aten._reshape_alias %1501, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
  %1511 = torch.aten.permute %1510, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
  %1512 = torch.aten.clone %1511, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
  %1513 = torch.aten._unsafe_view %1512, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1514 = torch.aten._reshape_alias %1505, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1515 = torch.aten.permute %1514, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1516 = torch.aten.clone %1515, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1517 = torch.aten._unsafe_view %1516, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1518 = torch.aten._reshape_alias %1509, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1519 = torch.aten.permute %1518, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1520 = torch.aten.clone %1519, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1521 = torch.aten._unsafe_view %1520, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1522 = torch.aten.transpose.int %1517, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
  %1523 = torch.aten.expand %1513, %1457, %false : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,160],f16>
  %1524 = torch.aten._reshape_alias %1523, %1457, %1469 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1525 = torch.aten.expand %1522, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
  %1526 = torch.aten._reshape_alias %1525, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
  %1527 = torch.aten.bmm %1524, %1526 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,64,77],f16>
  %1528 = torch.prim.ListConstruct %int16, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1529 = torch.aten._unsafe_view %1527, %1528 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
  %1530 = torch.aten.mul.Tensor %1529, %0 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,77],f16>
  %1531 = torch.aten._softmax %1530, %int-1, %false : !torch.vtensor<[16,64,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,77],f16>
  %1532 = torch.aten.expand %1531, %1528, %false : !torch.vtensor<[16,64,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,77],f16>
  %1533 = torch.prim.ListConstruct %int4928, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1534 = torch.aten._reshape_alias %1532, %1528, %1533 : !torch.vtensor<[16,64,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
  %1535 = torch.aten.expand %1521, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
  %1536 = torch.aten._reshape_alias %1535, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1537 = torch.aten.bmm %1534, %1536 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,64,160],f16>
  %1538 = torch.aten._unsafe_view %1537, %1457 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
  %1539 = torch.aten._reshape_alias %1538, %1487, %1488 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
  %1540 = torch.aten.permute %1539, %179 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
  %1541 = torch.aten.clone %1540, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
  %1542 = torch.aten._unsafe_view %1541, %1436 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1543 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1544 = torch.aten.view %1542, %1440 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1545 = torch.aten.addmm %29, %1544, %1543, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,1280],f16>
  %1546 = torch.aten.view %1545, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1547 = torch.aten.add.Tensor %1546, %1497, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
  %result0_58, %result1_59, %result2_60 = torch.aten.native_layer_norm %1547, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f32>, !torch.vtensor<[2,64,1],f32>
  %1548 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %1549 = torch.aten.view %result0_58, %1440 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
  %1550 = torch.aten.addmm %22, %1549, %1548, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,10240],f16>
  %1551 = torch.prim.ListConstruct %int2, %int64, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1552 = torch.aten.view %1550, %1551 : !torch.vtensor<[128,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10240],f16>
  %1553 = torch.aten.slice.Tensor %1552, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
  %1554 = torch.aten.slice.Tensor %1552, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
  %1555 = torch.aten.gelu %1554, %str_0 : !torch.vtensor<[2,64,5120],f16>, !torch.str -> !torch.vtensor<[2,64,5120],f16>
  %1556 = torch.aten.mul.Tensor %1553, %1555 : !torch.vtensor<[2,64,5120],f16>, !torch.vtensor<[2,64,5120],f16> -> !torch.vtensor<[2,64,5120],f16>
  %1557 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %1558 = torch.prim.ListConstruct %int128, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
  %1559 = torch.aten.view %1556, %1558 : !torch.vtensor<[2,64,5120],f16>, !torch.list<int> -> !torch.vtensor<[128,5120],f16>
  %1560 = torch.aten.addmm %29, %1559, %1557, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,1280],f16>
  %1561 = torch.aten.view %1560, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
  %1562 = torch.aten.add.Tensor %1561, %1547, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
  %1563 = torch.prim.ListConstruct %int2, %int8, %int8, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1564 = torch.prim.ListConstruct %int81920, %int10240, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1565 = torch.aten._reshape_alias %1562, %1563, %1564 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
  %1566 = torch.aten.permute %1565, %300 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
  %1567 = torch.aten._convolution %1566, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1568 = torch.aten.add.Tensor %1567, %1416, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1569 = torch.aten.clone %1568, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1570 = torch.aten.view %1569, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1571 = torch.aten._to_copy %1570, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1572 = torch.aten.var.correction %1571, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1573 = torch.aten.mean.dim %1571, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1574 = torch.aten.add.Tensor %1572, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1575 = torch.aten.rsqrt %1574 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1576 = torch.aten.sub.Tensor %1570, %1573, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1577 = torch.aten.mul.Tensor %1576, %1575 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1578 = torch.aten.view %1577, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1579 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1580 = torch.aten.unsqueeze %1579, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1581 = torch.aten.mul.Tensor %1578, %1580 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1582 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1583 = torch.aten.unsqueeze %1582, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1584 = torch.aten.add.Tensor %1581, %1583, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1585 = torch.aten._to_copy %1584, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1586 = torch.aten.silu %1585 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1587 = torch.aten._convolution %1586, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1588 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1589 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1590 = torch.aten.addmm %29, %1588, %1589, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1591 = torch.aten.slice.Tensor %1590, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1592 = torch.aten.slice.Tensor %1591, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1593 = torch.aten.unsqueeze %1592, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1594 = torch.aten.unsqueeze %1593, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1595 = torch.aten.add.Tensor %1587, %1594, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1596 = torch.aten.view %1595, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1597 = torch.aten._to_copy %1596, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1598 = torch.aten.var.correction %1597, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1599 = torch.aten.mean.dim %1597, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1600 = torch.aten.add.Tensor %1598, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1601 = torch.aten.rsqrt %1600 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1602 = torch.aten.sub.Tensor %1596, %1599, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1603 = torch.aten.mul.Tensor %1602, %1601 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1604 = torch.aten.view %1603, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1605 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1606 = torch.aten.unsqueeze %1605, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1607 = torch.aten.mul.Tensor %1604, %1606 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1608 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1609 = torch.aten.unsqueeze %1608, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1610 = torch.aten.add.Tensor %1607, %1609, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1611 = torch.aten._to_copy %1610, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1612 = torch.aten.silu %1611 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1613 = torch.aten._convolution %1612, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1614 = torch.aten.add.Tensor %1568, %1613, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1615 = torch.aten.div.Tensor %1614, %6 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1616 = torch.prim.ListConstruct %1615, %1369 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
  %1617 = torch.aten.cat %1616, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
  %1618 = torch.aten.clone %1617, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
  %1619 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1620 = torch.aten.view %1618, %1619 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
  %1621 = torch.aten._to_copy %1620, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f32>
  %1622 = torch.aten.var.correction %1621, %93, %int0, %true : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1623 = torch.aten.mean.dim %1621, %93, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1624 = torch.aten.add.Tensor %1622, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1625 = torch.aten.rsqrt %1624 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1626 = torch.aten.sub.Tensor %1620, %1623, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
  %1627 = torch.aten.mul.Tensor %1626, %1625 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
  %1628 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1629 = torch.aten.view %1627, %1628 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
  %1630 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1631 = torch.aten.unsqueeze %1630, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1632 = torch.aten.mul.Tensor %1629, %1631 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
  %1633 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1634 = torch.aten.unsqueeze %1633, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1635 = torch.aten.add.Tensor %1632, %1634, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
  %1636 = torch.aten._to_copy %1635, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,8,8],f16>
  %1637 = torch.aten.silu %1636 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
  %1638 = torch.aten._convolution %1637, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1639 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1640 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1641 = torch.aten.addmm %29, %1639, %1640, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1642 = torch.aten.slice.Tensor %1641, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1643 = torch.aten.slice.Tensor %1642, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1644 = torch.aten.unsqueeze %1643, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1645 = torch.aten.unsqueeze %1644, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1646 = torch.aten.add.Tensor %1638, %1645, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1647 = torch.aten.view %1646, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1648 = torch.aten._to_copy %1647, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1649 = torch.aten.var.correction %1648, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1650 = torch.aten.mean.dim %1648, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1651 = torch.aten.add.Tensor %1649, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1652 = torch.aten.rsqrt %1651 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1653 = torch.aten.sub.Tensor %1647, %1650, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1654 = torch.aten.mul.Tensor %1653, %1652 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1655 = torch.aten.view %1654, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1656 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1657 = torch.aten.unsqueeze %1656, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1658 = torch.aten.mul.Tensor %1655, %1657 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1659 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1660 = torch.aten.unsqueeze %1659, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1661 = torch.aten.add.Tensor %1658, %1660, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1662 = torch.aten._to_copy %1661, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1663 = torch.aten.silu %1662 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1664 = torch.aten._convolution %1663, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1665 = torch.aten._convolution %1617, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1666 = torch.aten.add.Tensor %1665, %1664, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1667 = torch.aten.div.Tensor %1666, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1668 = torch.prim.ListConstruct %1667, %1322 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
  %1669 = torch.aten.cat %1668, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
  %1670 = torch.aten.clone %1669, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
  %1671 = torch.aten.view %1670, %1619 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
  %1672 = torch.aten._to_copy %1671, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f32>
  %1673 = torch.aten.var.correction %1672, %93, %int0, %true : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1674 = torch.aten.mean.dim %1672, %93, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1675 = torch.aten.add.Tensor %1673, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1676 = torch.aten.rsqrt %1675 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1677 = torch.aten.sub.Tensor %1671, %1674, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
  %1678 = torch.aten.mul.Tensor %1677, %1676 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
  %1679 = torch.aten.view %1678, %1628 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
  %1680 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1681 = torch.aten.unsqueeze %1680, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1682 = torch.aten.mul.Tensor %1679, %1681 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
  %1683 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1684 = torch.aten.unsqueeze %1683, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1685 = torch.aten.add.Tensor %1682, %1684, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
  %1686 = torch.aten._to_copy %1685, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,8,8],f16>
  %1687 = torch.aten.silu %1686 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
  %1688 = torch.aten._convolution %1687, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1689 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1690 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1691 = torch.aten.addmm %29, %1689, %1690, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1692 = torch.aten.slice.Tensor %1691, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1693 = torch.aten.slice.Tensor %1692, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1694 = torch.aten.unsqueeze %1693, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1695 = torch.aten.unsqueeze %1694, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1696 = torch.aten.add.Tensor %1688, %1695, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1697 = torch.aten.view %1696, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1698 = torch.aten._to_copy %1697, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1699 = torch.aten.var.correction %1698, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1700 = torch.aten.mean.dim %1698, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1701 = torch.aten.add.Tensor %1699, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1702 = torch.aten.rsqrt %1701 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1703 = torch.aten.sub.Tensor %1697, %1700, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1704 = torch.aten.mul.Tensor %1703, %1702 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1705 = torch.aten.view %1704, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1706 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1707 = torch.aten.unsqueeze %1706, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1708 = torch.aten.mul.Tensor %1705, %1707 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1709 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1710 = torch.aten.unsqueeze %1709, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1711 = torch.aten.add.Tensor %1708, %1710, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1712 = torch.aten._to_copy %1711, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1713 = torch.aten.silu %1712 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1714 = torch.aten._convolution %1713, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1715 = torch.aten._convolution %1669, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1716 = torch.aten.add.Tensor %1715, %1714, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1717 = torch.aten.div.Tensor %1716, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1718 = torch.prim.ListConstruct %1717, %1273 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
  %1719 = torch.aten.cat %1718, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
  %1720 = torch.aten.clone %1719, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
  %1721 = torch.aten.view %1720, %1619 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
  %1722 = torch.aten._to_copy %1721, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f32>
  %1723 = torch.aten.var.correction %1722, %93, %int0, %true : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1724 = torch.aten.mean.dim %1722, %93, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1725 = torch.aten.add.Tensor %1723, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1726 = torch.aten.rsqrt %1725 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1727 = torch.aten.sub.Tensor %1721, %1724, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
  %1728 = torch.aten.mul.Tensor %1727, %1726 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
  %1729 = torch.aten.view %1728, %1628 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
  %1730 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1731 = torch.aten.unsqueeze %1730, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1732 = torch.aten.mul.Tensor %1729, %1731 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
  %1733 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1734 = torch.aten.unsqueeze %1733, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1735 = torch.aten.add.Tensor %1732, %1734, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
  %1736 = torch.aten._to_copy %1735, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,8,8],f16>
  %1737 = torch.aten.silu %1736 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
  %1738 = torch.aten._convolution %1737, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1739 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1740 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1741 = torch.aten.addmm %29, %1739, %1740, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1742 = torch.aten.slice.Tensor %1741, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1743 = torch.aten.slice.Tensor %1742, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1744 = torch.aten.unsqueeze %1743, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1745 = torch.aten.unsqueeze %1744, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1746 = torch.aten.add.Tensor %1738, %1745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1747 = torch.aten.view %1746, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
  %1748 = torch.aten._to_copy %1747, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
  %1749 = torch.aten.var.correction %1748, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1750 = torch.aten.mean.dim %1748, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1751 = torch.aten.add.Tensor %1749, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1752 = torch.aten.rsqrt %1751 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1753 = torch.aten.sub.Tensor %1747, %1750, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
  %1754 = torch.aten.mul.Tensor %1753, %1752 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
  %1755 = torch.aten.view %1754, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
  %1756 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1757 = torch.aten.unsqueeze %1756, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1758 = torch.aten.mul.Tensor %1755, %1757 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
  %1759 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1760 = torch.aten.unsqueeze %1759, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1761 = torch.aten.add.Tensor %1758, %1760, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
  %1762 = torch.aten._to_copy %1761, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
  %1763 = torch.aten.silu %1762 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
  %1764 = torch.aten._convolution %1763, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1765 = torch.aten._convolution %1719, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
  %1766 = torch.aten.add.Tensor %1765, %1764, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
  %1767 = torch.aten.div.Tensor %1766, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
  %1768 = torch.prim.ListConstruct %float2.000000e00, %float2.000000e00 : (!torch.float, !torch.float) -> !torch.list<float>
  %1769 = torch.aten.upsample_nearest2d.vec %1767, %none, %1768 : !torch.vtensor<[2,1280,8,8],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,16,16],f16>
  %1770 = torch.aten._convolution %1769, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1771 = torch.prim.ListConstruct %1770, %1272 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
  %1772 = torch.aten.cat %1771, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
  %1773 = torch.aten.clone %1772, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
  %1774 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1775 = torch.aten.view %1773, %1774 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
  %1776 = torch.aten._to_copy %1775, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f32>
  %1777 = torch.aten.var.correction %1776, %93, %int0, %true : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1778 = torch.aten.mean.dim %1776, %93, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1779 = torch.aten.add.Tensor %1777, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1780 = torch.aten.rsqrt %1779 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1781 = torch.aten.sub.Tensor %1775, %1778, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
  %1782 = torch.aten.mul.Tensor %1781, %1780 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
  %1783 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1784 = torch.aten.view %1782, %1783 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
  %1785 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1786 = torch.aten.unsqueeze %1785, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1787 = torch.aten.mul.Tensor %1784, %1786 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
  %1788 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1789 = torch.aten.unsqueeze %1788, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1790 = torch.aten.add.Tensor %1787, %1789, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
  %1791 = torch.aten._to_copy %1790, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,16,16],f16>
  %1792 = torch.aten.silu %1791 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
  %1793 = torch.aten._convolution %1792, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1794 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1795 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1796 = torch.aten.addmm %29, %1794, %1795, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1797 = torch.aten.slice.Tensor %1796, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1798 = torch.aten.slice.Tensor %1797, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1799 = torch.aten.unsqueeze %1798, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1800 = torch.aten.unsqueeze %1799, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1801 = torch.aten.add.Tensor %1793, %1800, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1802 = torch.aten.view %1801, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %1803 = torch.aten._to_copy %1802, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %1804 = torch.aten.var.correction %1803, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1805 = torch.aten.mean.dim %1803, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1806 = torch.aten.add.Tensor %1804, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1807 = torch.aten.rsqrt %1806 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1808 = torch.aten.sub.Tensor %1802, %1805, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %1809 = torch.aten.mul.Tensor %1808, %1807 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %1810 = torch.aten.view %1809, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %1811 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1812 = torch.aten.unsqueeze %1811, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1813 = torch.aten.mul.Tensor %1810, %1812 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %1814 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1815 = torch.aten.unsqueeze %1814, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1816 = torch.aten.add.Tensor %1813, %1815, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %1817 = torch.aten._to_copy %1816, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %1818 = torch.aten.silu %1817 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
  %1819 = torch.aten._convolution %1818, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1820 = torch.aten._convolution %1772, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1821 = torch.aten.add.Tensor %1820, %1819, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1822 = torch.aten.div.Tensor %1821, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
  %1823 = torch.aten.clone %1822, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1824 = torch.aten.view %1823, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %1825 = torch.aten._to_copy %1824, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %1826 = torch.aten.var.correction %1825, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1827 = torch.aten.mean.dim %1825, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1828 = torch.aten.add.Tensor %1826, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1829 = torch.aten.rsqrt %1828 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1830 = torch.aten.sub.Tensor %1824, %1827, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %1831 = torch.aten.mul.Tensor %1830, %1829 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %1832 = torch.aten.view %1831, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %1833 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1834 = torch.aten.unsqueeze %1833, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1835 = torch.aten.mul.Tensor %1832, %1834 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %1836 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1837 = torch.aten.unsqueeze %1836, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1838 = torch.aten.add.Tensor %1835, %1837, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %1839 = torch.aten._to_copy %1838, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %1840 = torch.aten._convolution %1839, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1841 = torch.aten.permute %1840, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %1842 = torch.aten._reshape_alias %1841, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %result0_61, %result1_62, %result2_63 = torch.aten.native_layer_norm %1842, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1843 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1844 = torch.aten._reshape_alias %result0_61, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1845 = torch.aten.mm %1844, %1843 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1846 = torch.aten._unsafe_view %1845, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1847 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1848 = torch.aten._reshape_alias %result0_61, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1849 = torch.aten.mm %1848, %1847 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1850 = torch.aten._unsafe_view %1849, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1851 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1852 = torch.aten._reshape_alias %result0_61, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1853 = torch.aten.mm %1852, %1851 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1854 = torch.aten._unsafe_view %1853, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1855 = torch.aten._reshape_alias %1846, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1856 = torch.aten.permute %1855, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1857 = torch.aten.clone %1856, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1858 = torch.aten._unsafe_view %1857, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1859 = torch.aten._reshape_alias %1850, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1860 = torch.aten.permute %1859, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1861 = torch.aten.clone %1860, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1862 = torch.aten._unsafe_view %1861, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1863 = torch.aten._reshape_alias %1854, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1864 = torch.aten.permute %1863, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1865 = torch.aten.clone %1864, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1866 = torch.aten._unsafe_view %1865, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1867 = torch.aten.transpose.int %1862, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
  %1868 = torch.aten.expand %1858, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1869 = torch.aten._reshape_alias %1868, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1870 = torch.aten.expand %1867, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
  %1871 = torch.aten._reshape_alias %1870, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
  %1872 = torch.aten.bmm %1869, %1871 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
  %1873 = torch.aten._unsafe_view %1872, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %1874 = torch.aten.mul.Tensor %1873, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
  %1875 = torch.aten._softmax %1874, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %1876 = torch.aten.expand %1875, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %1877 = torch.aten._reshape_alias %1876, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %1878 = torch.aten.expand %1866, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1879 = torch.aten._reshape_alias %1878, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1880 = torch.aten.bmm %1877, %1879 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %1881 = torch.aten._unsafe_view %1880, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1882 = torch.aten._reshape_alias %1881, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1883 = torch.aten.permute %1882, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1884 = torch.aten.clone %1883, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %1885 = torch.aten._unsafe_view %1884, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1886 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1887 = torch.aten.view %1885, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1888 = torch.aten.addmm %29, %1887, %1886, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1889 = torch.aten.view %1888, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1890 = torch.aten.add.Tensor %1889, %1842, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_64, %result1_65, %result2_66 = torch.aten.native_layer_norm %1890, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1891 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1892 = torch.aten._reshape_alias %result0_64, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1893 = torch.aten.mm %1892, %1891 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %1894 = torch.aten._unsafe_view %1893, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1895 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1896 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1897 = torch.aten.mm %1896, %1895 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1898 = torch.aten._unsafe_view %1897, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1899 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %1900 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %1901 = torch.aten.mm %1900, %1899 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1902 = torch.aten._unsafe_view %1901, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1903 = torch.aten._reshape_alias %1894, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1904 = torch.aten.permute %1903, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1905 = torch.aten.clone %1904, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %1906 = torch.aten._unsafe_view %1905, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1907 = torch.aten._reshape_alias %1898, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1908 = torch.aten.permute %1907, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1909 = torch.aten.clone %1908, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1910 = torch.aten._unsafe_view %1909, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1911 = torch.aten._reshape_alias %1902, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %1912 = torch.aten.permute %1911, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %1913 = torch.aten.clone %1912, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %1914 = torch.aten._unsafe_view %1913, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1915 = torch.aten.transpose.int %1910, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
  %1916 = torch.aten.expand %1906, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %1917 = torch.aten._reshape_alias %1916, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1918 = torch.aten.expand %1915, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
  %1919 = torch.aten._reshape_alias %1918, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
  %1920 = torch.aten.bmm %1917, %1919 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
  %1921 = torch.aten._unsafe_view %1920, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %1922 = torch.aten.mul.Tensor %1921, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
  %1923 = torch.aten._softmax %1922, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %1924 = torch.aten.expand %1923, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %1925 = torch.aten._reshape_alias %1924, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %1926 = torch.aten.expand %1914, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
  %1927 = torch.aten._reshape_alias %1926, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %1928 = torch.aten.bmm %1925, %1927 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %1929 = torch.aten._unsafe_view %1928, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %1930 = torch.aten._reshape_alias %1929, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %1931 = torch.aten.permute %1930, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %1932 = torch.aten.clone %1931, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %1933 = torch.aten._unsafe_view %1932, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1934 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1935 = torch.aten.view %1933, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1936 = torch.aten.addmm %29, %1935, %1934, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1937 = torch.aten.view %1936, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1938 = torch.aten.add.Tensor %1937, %1890, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_67, %result1_68, %result2_69 = torch.aten.native_layer_norm %1938, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %1939 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %1940 = torch.aten.view %result0_67, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %1941 = torch.aten.addmm %22, %1940, %1939, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
  %1942 = torch.aten.view %1941, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
  %1943 = torch.aten.slice.Tensor %1942, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %1944 = torch.aten.slice.Tensor %1942, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %1945 = torch.aten.gelu %1944, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
  %1946 = torch.aten.mul.Tensor %1943, %1945 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
  %1947 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %1948 = torch.aten.view %1946, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
  %1949 = torch.aten.addmm %29, %1948, %1947, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %1950 = torch.aten.view %1949, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %1951 = torch.aten.add.Tensor %1950, %1938, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %1952 = torch.aten._reshape_alias %1951, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %1953 = torch.aten.permute %1952, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
  %1954 = torch.aten._convolution %1953, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1955 = torch.aten.add.Tensor %1954, %1822, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1956 = torch.prim.ListConstruct %1955, %1092 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
  %1957 = torch.aten.cat %1956, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
  %1958 = torch.aten.clone %1957, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
  %1959 = torch.aten.view %1958, %1774 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
  %1960 = torch.aten._to_copy %1959, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f32>
  %1961 = torch.aten.var.correction %1960, %93, %int0, %true : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1962 = torch.aten.mean.dim %1960, %93, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1963 = torch.aten.add.Tensor %1961, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1964 = torch.aten.rsqrt %1963 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1965 = torch.aten.sub.Tensor %1959, %1962, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
  %1966 = torch.aten.mul.Tensor %1965, %1964 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
  %1967 = torch.aten.view %1966, %1783 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
  %1968 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1969 = torch.aten.unsqueeze %1968, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1970 = torch.aten.mul.Tensor %1967, %1969 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
  %1971 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
  %1972 = torch.aten.unsqueeze %1971, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
  %1973 = torch.aten.add.Tensor %1970, %1972, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
  %1974 = torch.aten._to_copy %1973, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,16,16],f16>
  %1975 = torch.aten.silu %1974 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
  %1976 = torch.aten._convolution %1975, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %1977 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1978 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1979 = torch.aten.addmm %29, %1977, %1978, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1980 = torch.aten.slice.Tensor %1979, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1981 = torch.aten.slice.Tensor %1980, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1982 = torch.aten.unsqueeze %1981, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1983 = torch.aten.unsqueeze %1982, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1984 = torch.aten.add.Tensor %1976, %1983, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %1985 = torch.aten.view %1984, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %1986 = torch.aten._to_copy %1985, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %1987 = torch.aten.var.correction %1986, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %1988 = torch.aten.mean.dim %1986, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %1989 = torch.aten.add.Tensor %1987, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1990 = torch.aten.rsqrt %1989 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1991 = torch.aten.sub.Tensor %1985, %1988, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %1992 = torch.aten.mul.Tensor %1991, %1990 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %1993 = torch.aten.view %1992, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %1994 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1995 = torch.aten.unsqueeze %1994, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1996 = torch.aten.mul.Tensor %1993, %1995 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %1997 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %1998 = torch.aten.unsqueeze %1997, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %1999 = torch.aten.add.Tensor %1996, %1998, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %2000 = torch.aten._to_copy %1999, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %2001 = torch.aten.silu %2000 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
  %2002 = torch.aten._convolution %2001, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2003 = torch.aten._convolution %1957, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2004 = torch.aten.add.Tensor %2003, %2002, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2005 = torch.aten.div.Tensor %2004, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
  %2006 = torch.aten.clone %2005, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2007 = torch.aten.view %2006, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %2008 = torch.aten._to_copy %2007, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %2009 = torch.aten.var.correction %2008, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2010 = torch.aten.mean.dim %2008, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2011 = torch.aten.add.Tensor %2009, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2012 = torch.aten.rsqrt %2011 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2013 = torch.aten.sub.Tensor %2007, %2010, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %2014 = torch.aten.mul.Tensor %2013, %2012 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %2015 = torch.aten.view %2014, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %2016 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2017 = torch.aten.unsqueeze %2016, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2018 = torch.aten.mul.Tensor %2015, %2017 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %2019 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2020 = torch.aten.unsqueeze %2019, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2021 = torch.aten.add.Tensor %2018, %2020, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %2022 = torch.aten._to_copy %2021, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %2023 = torch.aten._convolution %2022, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2024 = torch.aten.permute %2023, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %2025 = torch.aten._reshape_alias %2024, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %result0_70, %result1_71, %result2_72 = torch.aten.native_layer_norm %2025, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %2026 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2027 = torch.aten._reshape_alias %result0_70, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2028 = torch.aten.mm %2027, %2026 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2029 = torch.aten._unsafe_view %2028, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2030 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2031 = torch.aten._reshape_alias %result0_70, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2032 = torch.aten.mm %2031, %2030 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2033 = torch.aten._unsafe_view %2032, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2034 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2035 = torch.aten._reshape_alias %result0_70, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2036 = torch.aten.mm %2035, %2034 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2037 = torch.aten._unsafe_view %2036, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2038 = torch.aten._reshape_alias %2029, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2039 = torch.aten.permute %2038, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2040 = torch.aten.clone %2039, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2041 = torch.aten._unsafe_view %2040, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2042 = torch.aten._reshape_alias %2033, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2043 = torch.aten.permute %2042, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2044 = torch.aten.clone %2043, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2045 = torch.aten._unsafe_view %2044, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2046 = torch.aten._reshape_alias %2037, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2047 = torch.aten.permute %2046, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2048 = torch.aten.clone %2047, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2049 = torch.aten._unsafe_view %2048, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2050 = torch.aten.transpose.int %2045, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
  %2051 = torch.aten.expand %2041, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %2052 = torch.aten._reshape_alias %2051, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2053 = torch.aten.expand %2050, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
  %2054 = torch.aten._reshape_alias %2053, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
  %2055 = torch.aten.bmm %2052, %2054 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
  %2056 = torch.aten._unsafe_view %2055, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %2057 = torch.aten.mul.Tensor %2056, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
  %2058 = torch.aten._softmax %2057, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %2059 = torch.aten.expand %2058, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %2060 = torch.aten._reshape_alias %2059, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %2061 = torch.aten.expand %2049, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %2062 = torch.aten._reshape_alias %2061, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2063 = torch.aten.bmm %2060, %2062 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %2064 = torch.aten._unsafe_view %2063, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2065 = torch.aten._reshape_alias %2064, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2066 = torch.aten.permute %2065, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2067 = torch.aten.clone %2066, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %2068 = torch.aten._unsafe_view %2067, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2069 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2070 = torch.aten.view %2068, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2071 = torch.aten.addmm %29, %2070, %2069, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %2072 = torch.aten.view %2071, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2073 = torch.aten.add.Tensor %2072, %2025, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_73, %result1_74, %result2_75 = torch.aten.native_layer_norm %2073, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %2074 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2075 = torch.aten._reshape_alias %result0_73, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2076 = torch.aten.mm %2075, %2074 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2077 = torch.aten._unsafe_view %2076, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2078 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %2079 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2080 = torch.aten.mm %2079, %2078 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2081 = torch.aten._unsafe_view %2080, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2082 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %2083 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2084 = torch.aten.mm %2083, %2082 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2085 = torch.aten._unsafe_view %2084, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2086 = torch.aten._reshape_alias %2077, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2087 = torch.aten.permute %2086, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2088 = torch.aten.clone %2087, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2089 = torch.aten._unsafe_view %2088, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2090 = torch.aten._reshape_alias %2081, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %2091 = torch.aten.permute %2090, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %2092 = torch.aten.clone %2091, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %2093 = torch.aten._unsafe_view %2092, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %2094 = torch.aten._reshape_alias %2085, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %2095 = torch.aten.permute %2094, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %2096 = torch.aten.clone %2095, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %2097 = torch.aten._unsafe_view %2096, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %2098 = torch.aten.transpose.int %2093, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
  %2099 = torch.aten.expand %2089, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %2100 = torch.aten._reshape_alias %2099, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2101 = torch.aten.expand %2098, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
  %2102 = torch.aten._reshape_alias %2101, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
  %2103 = torch.aten.bmm %2100, %2102 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
  %2104 = torch.aten._unsafe_view %2103, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %2105 = torch.aten.mul.Tensor %2104, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
  %2106 = torch.aten._softmax %2105, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %2107 = torch.aten.expand %2106, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %2108 = torch.aten._reshape_alias %2107, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %2109 = torch.aten.expand %2097, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
  %2110 = torch.aten._reshape_alias %2109, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %2111 = torch.aten.bmm %2108, %2110 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %2112 = torch.aten._unsafe_view %2111, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2113 = torch.aten._reshape_alias %2112, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2114 = torch.aten.permute %2113, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2115 = torch.aten.clone %2114, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %2116 = torch.aten._unsafe_view %2115, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2117 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2118 = torch.aten.view %2116, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2119 = torch.aten.addmm %29, %2118, %2117, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %2120 = torch.aten.view %2119, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2121 = torch.aten.add.Tensor %2120, %2073, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_76, %result1_77, %result2_78 = torch.aten.native_layer_norm %2121, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %2122 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %2123 = torch.aten.view %result0_76, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2124 = torch.aten.addmm %22, %2123, %2122, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
  %2125 = torch.aten.view %2124, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
  %2126 = torch.aten.slice.Tensor %2125, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %2127 = torch.aten.slice.Tensor %2125, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %2128 = torch.aten.gelu %2127, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
  %2129 = torch.aten.mul.Tensor %2126, %2128 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
  %2130 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %2131 = torch.aten.view %2129, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
  %2132 = torch.aten.addmm %29, %2131, %2130, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %2133 = torch.aten.view %2132, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2134 = torch.aten.add.Tensor %2133, %2121, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %2135 = torch.aten._reshape_alias %2134, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %2136 = torch.aten.permute %2135, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
  %2137 = torch.aten._convolution %2136, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2138 = torch.aten.add.Tensor %2137, %2005, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2139 = torch.prim.ListConstruct %2138, %879 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,640,16,16],f16>) -> !torch.list<vtensor>
  %2140 = torch.aten.cat %2139, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
  %2141 = torch.aten.clone %2140, %int0 : !torch.vtensor<[2,1920,16,16],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
  %2142 = torch.prim.ListConstruct %int2, %int32, %int60, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2143 = torch.aten.view %2141, %2142 : !torch.vtensor<[2,1920,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,256],f16>
  %2144 = torch.aten._to_copy %2143, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,60,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,256],f32>
  %2145 = torch.aten.var.correction %2144, %93, %int0, %true : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2146 = torch.aten.mean.dim %2144, %93, %true, %none : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2147 = torch.aten.add.Tensor %2145, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2148 = torch.aten.rsqrt %2147 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2149 = torch.aten.sub.Tensor %2143, %2146, %int1 : !torch.vtensor<[2,32,60,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,256],f32>
  %2150 = torch.aten.mul.Tensor %2149, %2148 : !torch.vtensor<[2,32,60,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,256],f32>
  %2151 = torch.prim.ListConstruct %int2, %int1920, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2152 = torch.aten.view %2150, %2151 : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,16,16],f32>
  %2153 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
  %2154 = torch.aten.unsqueeze %2153, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
  %2155 = torch.aten.mul.Tensor %2152, %2154 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,16,16],f32>
  %2156 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
  %2157 = torch.aten.unsqueeze %2156, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
  %2158 = torch.aten.add.Tensor %2155, %2157, %int1 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f32>
  %2159 = torch.aten._to_copy %2158, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,16,16],f16>
  %2160 = torch.aten.silu %2159 : !torch.vtensor<[2,1920,16,16],f16> -> !torch.vtensor<[2,1920,16,16],f16>
  %2161 = torch.aten._convolution %2160, %17, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2162 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2163 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2164 = torch.aten.addmm %29, %2162, %2163, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2165 = torch.aten.slice.Tensor %2164, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2166 = torch.aten.slice.Tensor %2165, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2167 = torch.aten.unsqueeze %2166, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2168 = torch.aten.unsqueeze %2167, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2169 = torch.aten.add.Tensor %2161, %2168, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2170 = torch.aten.view %2169, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %2171 = torch.aten._to_copy %2170, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %2172 = torch.aten.var.correction %2171, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2173 = torch.aten.mean.dim %2171, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2174 = torch.aten.add.Tensor %2172, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2175 = torch.aten.rsqrt %2174 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2176 = torch.aten.sub.Tensor %2170, %2173, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %2177 = torch.aten.mul.Tensor %2176, %2175 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %2178 = torch.aten.view %2177, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %2179 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2180 = torch.aten.unsqueeze %2179, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2181 = torch.aten.mul.Tensor %2178, %2180 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %2182 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2183 = torch.aten.unsqueeze %2182, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2184 = torch.aten.add.Tensor %2181, %2183, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %2185 = torch.aten._to_copy %2184, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %2186 = torch.aten.silu %2185 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
  %2187 = torch.aten._convolution %2186, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2188 = torch.aten._convolution %2140, %18, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2189 = torch.aten.add.Tensor %2188, %2187, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2190 = torch.aten.div.Tensor %2189, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
  %2191 = torch.aten.clone %2190, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2192 = torch.aten.view %2191, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
  %2193 = torch.aten._to_copy %2192, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
  %2194 = torch.aten.var.correction %2193, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2195 = torch.aten.mean.dim %2193, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2196 = torch.aten.add.Tensor %2194, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2197 = torch.aten.rsqrt %2196 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2198 = torch.aten.sub.Tensor %2192, %2195, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
  %2199 = torch.aten.mul.Tensor %2198, %2197 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
  %2200 = torch.aten.view %2199, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
  %2201 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2202 = torch.aten.unsqueeze %2201, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2203 = torch.aten.mul.Tensor %2200, %2202 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
  %2204 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2205 = torch.aten.unsqueeze %2204, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2206 = torch.aten.add.Tensor %2203, %2205, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
  %2207 = torch.aten._to_copy %2206, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
  %2208 = torch.aten._convolution %2207, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2209 = torch.aten.permute %2208, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %2210 = torch.aten._reshape_alias %2209, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %result0_79, %result1_80, %result2_81 = torch.aten.native_layer_norm %2210, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %2211 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2212 = torch.aten._reshape_alias %result0_79, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2213 = torch.aten.mm %2212, %2211 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2214 = torch.aten._unsafe_view %2213, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2215 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2216 = torch.aten._reshape_alias %result0_79, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2217 = torch.aten.mm %2216, %2215 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2218 = torch.aten._unsafe_view %2217, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2219 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2220 = torch.aten._reshape_alias %result0_79, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2221 = torch.aten.mm %2220, %2219 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2222 = torch.aten._unsafe_view %2221, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2223 = torch.aten._reshape_alias %2214, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2224 = torch.aten.permute %2223, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2225 = torch.aten.clone %2224, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2226 = torch.aten._unsafe_view %2225, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2227 = torch.aten._reshape_alias %2218, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2228 = torch.aten.permute %2227, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2229 = torch.aten.clone %2228, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2230 = torch.aten._unsafe_view %2229, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2231 = torch.aten._reshape_alias %2222, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2232 = torch.aten.permute %2231, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2233 = torch.aten.clone %2232, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2234 = torch.aten._unsafe_view %2233, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2235 = torch.aten.transpose.int %2230, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
  %2236 = torch.aten.expand %2226, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %2237 = torch.aten._reshape_alias %2236, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2238 = torch.aten.expand %2235, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
  %2239 = torch.aten._reshape_alias %2238, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
  %2240 = torch.aten.bmm %2237, %2239 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
  %2241 = torch.aten._unsafe_view %2240, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %2242 = torch.aten.mul.Tensor %2241, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
  %2243 = torch.aten._softmax %2242, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %2244 = torch.aten.expand %2243, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
  %2245 = torch.aten._reshape_alias %2244, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
  %2246 = torch.aten.expand %2234, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %2247 = torch.aten._reshape_alias %2246, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2248 = torch.aten.bmm %2245, %2247 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %2249 = torch.aten._unsafe_view %2248, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2250 = torch.aten._reshape_alias %2249, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2251 = torch.aten.permute %2250, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2252 = torch.aten.clone %2251, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %2253 = torch.aten._unsafe_view %2252, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2254 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2255 = torch.aten.view %2253, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2256 = torch.aten.addmm %29, %2255, %2254, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %2257 = torch.aten.view %2256, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2258 = torch.aten.add.Tensor %2257, %2210, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_82, %result1_83, %result2_84 = torch.aten.native_layer_norm %2258, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %2259 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2260 = torch.aten._reshape_alias %result0_82, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2261 = torch.aten.mm %2260, %2259 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
  %2262 = torch.aten._unsafe_view %2261, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2263 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %2264 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2265 = torch.aten.mm %2264, %2263 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2266 = torch.aten._unsafe_view %2265, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2267 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
  %2268 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2269 = torch.aten.mm %2268, %2267 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2270 = torch.aten._unsafe_view %2269, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2271 = torch.aten._reshape_alias %2262, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2272 = torch.aten.permute %2271, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2273 = torch.aten.clone %2272, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
  %2274 = torch.aten._unsafe_view %2273, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2275 = torch.aten._reshape_alias %2266, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %2276 = torch.aten.permute %2275, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %2277 = torch.aten.clone %2276, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %2278 = torch.aten._unsafe_view %2277, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %2279 = torch.aten._reshape_alias %2270, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
  %2280 = torch.aten.permute %2279, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
  %2281 = torch.aten.clone %2280, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
  %2282 = torch.aten._unsafe_view %2281, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %2283 = torch.aten.transpose.int %2278, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
  %2284 = torch.aten.expand %2274, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
  %2285 = torch.aten._reshape_alias %2284, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2286 = torch.aten.expand %2283, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
  %2287 = torch.aten._reshape_alias %2286, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
  %2288 = torch.aten.bmm %2285, %2287 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
  %2289 = torch.aten._unsafe_view %2288, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %2290 = torch.aten.mul.Tensor %2289, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
  %2291 = torch.aten._softmax %2290, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %2292 = torch.aten.expand %2291, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
  %2293 = torch.aten._reshape_alias %2292, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
  %2294 = torch.aten.expand %2282, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
  %2295 = torch.aten._reshape_alias %2294, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
  %2296 = torch.aten.bmm %2293, %2295 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
  %2297 = torch.aten._unsafe_view %2296, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
  %2298 = torch.aten._reshape_alias %2297, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
  %2299 = torch.aten.permute %2298, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
  %2300 = torch.aten.clone %2299, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
  %2301 = torch.aten._unsafe_view %2300, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2302 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2303 = torch.aten.view %2301, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2304 = torch.aten.addmm %29, %2303, %2302, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %2305 = torch.aten.view %2304, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2306 = torch.aten.add.Tensor %2305, %2258, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %result0_85, %result1_86, %result2_87 = torch.aten.native_layer_norm %2306, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
  %2307 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %2308 = torch.aten.view %result0_85, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
  %2309 = torch.aten.addmm %22, %2308, %2307, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
  %2310 = torch.aten.view %2309, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
  %2311 = torch.aten.slice.Tensor %2310, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %2312 = torch.aten.slice.Tensor %2310, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
  %2313 = torch.aten.gelu %2312, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
  %2314 = torch.aten.mul.Tensor %2311, %2313 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
  %2315 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %2316 = torch.aten.view %2314, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
  %2317 = torch.aten.addmm %29, %2316, %2315, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
  %2318 = torch.aten.view %2317, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
  %2319 = torch.aten.add.Tensor %2318, %2306, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
  %2320 = torch.aten._reshape_alias %2319, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
  %2321 = torch.aten.permute %2320, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
  %2322 = torch.aten._convolution %2321, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
  %2323 = torch.aten.add.Tensor %2322, %2190, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
  %2324 = torch.aten.upsample_nearest2d.vec %2323, %none, %1768 : !torch.vtensor<[2,1280,16,16],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,32,32],f16>
  %2325 = torch.aten._convolution %2324, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,32,32],f16>
  %2326 = torch.prim.ListConstruct %2325, %878 : (!torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
  %2327 = torch.aten.cat %2326, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
  %2328 = torch.aten.clone %2327, %int0 : !torch.vtensor<[2,1920,32,32],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
  %2329 = torch.prim.ListConstruct %int2, %int32, %int60, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2330 = torch.aten.view %2328, %2329 : !torch.vtensor<[2,1920,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,1024],f16>
  %2331 = torch.aten._to_copy %2330, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,60,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,1024],f32>
  %2332 = torch.aten.var.correction %2331, %93, %int0, %true : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2333 = torch.aten.mean.dim %2331, %93, %true, %none : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2334 = torch.aten.add.Tensor %2332, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2335 = torch.aten.rsqrt %2334 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2336 = torch.aten.sub.Tensor %2330, %2333, %int1 : !torch.vtensor<[2,32,60,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,1024],f32>
  %2337 = torch.aten.mul.Tensor %2336, %2335 : !torch.vtensor<[2,32,60,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,1024],f32>
  %2338 = torch.prim.ListConstruct %int2, %int1920, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2339 = torch.aten.view %2337, %2338 : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,32,32],f32>
  %2340 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
  %2341 = torch.aten.unsqueeze %2340, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
  %2342 = torch.aten.mul.Tensor %2339, %2341 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,32,32],f32>
  %2343 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
  %2344 = torch.aten.unsqueeze %2343, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
  %2345 = torch.aten.add.Tensor %2342, %2344, %int1 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f32>
  %2346 = torch.aten._to_copy %2345, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,32,32],f16>
  %2347 = torch.aten.silu %2346 : !torch.vtensor<[2,1920,32,32],f16> -> !torch.vtensor<[2,1920,32,32],f16>
  %2348 = torch.aten._convolution %2347, %27, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2349 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2350 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %2351 = torch.aten.addmm %45, %2349, %2350, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2352 = torch.aten.slice.Tensor %2351, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2353 = torch.aten.slice.Tensor %2352, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2354 = torch.aten.unsqueeze %2353, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %2355 = torch.aten.unsqueeze %2354, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %2356 = torch.aten.add.Tensor %2348, %2355, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2357 = torch.aten.view %2356, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %2358 = torch.aten._to_copy %2357, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %2359 = torch.aten.var.correction %2358, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2360 = torch.aten.mean.dim %2358, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2361 = torch.aten.add.Tensor %2359, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2362 = torch.aten.rsqrt %2361 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2363 = torch.aten.sub.Tensor %2357, %2360, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %2364 = torch.aten.mul.Tensor %2363, %2362 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %2365 = torch.aten.view %2364, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %2366 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2367 = torch.aten.unsqueeze %2366, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2368 = torch.aten.mul.Tensor %2365, %2367 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %2369 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2370 = torch.aten.unsqueeze %2369, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2371 = torch.aten.add.Tensor %2368, %2370, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %2372 = torch.aten._to_copy %2371, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %2373 = torch.aten.silu %2372 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
  %2374 = torch.aten._convolution %2373, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2375 = torch.aten._convolution %2327, %28, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2376 = torch.aten.add.Tensor %2375, %2374, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2377 = torch.aten.div.Tensor %2376, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
  %2378 = torch.aten.clone %2377, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2379 = torch.aten.view %2378, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %2380 = torch.aten._to_copy %2379, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %2381 = torch.aten.var.correction %2380, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2382 = torch.aten.mean.dim %2380, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2383 = torch.aten.add.Tensor %2381, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2384 = torch.aten.rsqrt %2383 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2385 = torch.aten.sub.Tensor %2379, %2382, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %2386 = torch.aten.mul.Tensor %2385, %2384 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %2387 = torch.aten.view %2386, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %2388 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2389 = torch.aten.unsqueeze %2388, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2390 = torch.aten.mul.Tensor %2387, %2389 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %2391 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2392 = torch.aten.unsqueeze %2391, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2393 = torch.aten.add.Tensor %2390, %2392, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %2394 = torch.aten._to_copy %2393, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %2395 = torch.aten._convolution %2394, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2396 = torch.aten.permute %2395, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %2397 = torch.aten._reshape_alias %2396, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %result0_88, %result1_89, %result2_90 = torch.aten.native_layer_norm %2397, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2398 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2399 = torch.aten._reshape_alias %result0_88, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2400 = torch.aten.mm %2399, %2398 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2401 = torch.aten._unsafe_view %2400, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2402 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2403 = torch.aten._reshape_alias %result0_88, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2404 = torch.aten.mm %2403, %2402 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2405 = torch.aten._unsafe_view %2404, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2406 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2407 = torch.aten._reshape_alias %result0_88, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2408 = torch.aten.mm %2407, %2406 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2409 = torch.aten._unsafe_view %2408, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2410 = torch.aten._reshape_alias %2401, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2411 = torch.aten.permute %2410, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2412 = torch.aten.clone %2411, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2413 = torch.aten._unsafe_view %2412, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2414 = torch.aten._reshape_alias %2405, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2415 = torch.aten.permute %2414, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2416 = torch.aten.clone %2415, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2417 = torch.aten._unsafe_view %2416, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2418 = torch.aten._reshape_alias %2409, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2419 = torch.aten.permute %2418, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2420 = torch.aten.clone %2419, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2421 = torch.aten._unsafe_view %2420, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2422 = torch.aten.transpose.int %2417, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
  %2423 = torch.aten.expand %2413, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2424 = torch.aten._reshape_alias %2423, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2425 = torch.aten.expand %2422, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
  %2426 = torch.aten._reshape_alias %2425, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
  %2427 = torch.aten.bmm %2424, %2426 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
  %2428 = torch.aten._unsafe_view %2427, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %2429 = torch.aten.mul.Tensor %2428, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
  %2430 = torch.aten._softmax %2429, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %2431 = torch.aten.expand %2430, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %2432 = torch.aten._reshape_alias %2431, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %2433 = torch.aten.expand %2421, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2434 = torch.aten._reshape_alias %2433, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2435 = torch.aten.bmm %2432, %2434 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %2436 = torch.aten._unsafe_view %2435, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2437 = torch.aten._reshape_alias %2436, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2438 = torch.aten.permute %2437, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2439 = torch.aten.clone %2438, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %2440 = torch.aten._unsafe_view %2439, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2441 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2442 = torch.aten.view %2440, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2443 = torch.aten.addmm %45, %2442, %2441, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2444 = torch.aten.view %2443, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2445 = torch.aten.add.Tensor %2444, %2397, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_91, %result1_92, %result2_93 = torch.aten.native_layer_norm %2445, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2446 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2447 = torch.aten._reshape_alias %result0_91, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2448 = torch.aten.mm %2447, %2446 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2449 = torch.aten._unsafe_view %2448, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2450 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %2451 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2452 = torch.aten.mm %2451, %2450 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %2453 = torch.aten._unsafe_view %2452, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %2454 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %2455 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2456 = torch.aten.mm %2455, %2454 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %2457 = torch.aten._unsafe_view %2456, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %2458 = torch.aten._reshape_alias %2449, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2459 = torch.aten.permute %2458, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2460 = torch.aten.clone %2459, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2461 = torch.aten._unsafe_view %2460, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2462 = torch.aten._reshape_alias %2453, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %2463 = torch.aten.permute %2462, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %2464 = torch.aten.clone %2463, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %2465 = torch.aten._unsafe_view %2464, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2466 = torch.aten._reshape_alias %2457, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %2467 = torch.aten.permute %2466, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %2468 = torch.aten.clone %2467, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %2469 = torch.aten._unsafe_view %2468, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2470 = torch.aten.transpose.int %2465, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
  %2471 = torch.aten.expand %2461, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2472 = torch.aten._reshape_alias %2471, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2473 = torch.aten.expand %2470, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
  %2474 = torch.aten._reshape_alias %2473, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
  %2475 = torch.aten.bmm %2472, %2474 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
  %2476 = torch.aten._unsafe_view %2475, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %2477 = torch.aten.mul.Tensor %2476, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
  %2478 = torch.aten._softmax %2477, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %2479 = torch.aten.expand %2478, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %2480 = torch.aten._reshape_alias %2479, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %2481 = torch.aten.expand %2469, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
  %2482 = torch.aten._reshape_alias %2481, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2483 = torch.aten.bmm %2480, %2482 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %2484 = torch.aten._unsafe_view %2483, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2485 = torch.aten._reshape_alias %2484, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2486 = torch.aten.permute %2485, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2487 = torch.aten.clone %2486, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %2488 = torch.aten._unsafe_view %2487, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2489 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2490 = torch.aten.view %2488, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2491 = torch.aten.addmm %45, %2490, %2489, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2492 = torch.aten.view %2491, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2493 = torch.aten.add.Tensor %2492, %2445, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_94, %result1_95, %result2_96 = torch.aten.native_layer_norm %2493, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2494 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %2495 = torch.aten.view %result0_94, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2496 = torch.aten.addmm %38, %2495, %2494, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
  %2497 = torch.aten.view %2496, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
  %2498 = torch.aten.slice.Tensor %2497, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %2499 = torch.aten.slice.Tensor %2497, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %2500 = torch.aten.gelu %2499, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
  %2501 = torch.aten.mul.Tensor %2498, %2500 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
  %2502 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %2503 = torch.aten.view %2501, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
  %2504 = torch.aten.addmm %45, %2503, %2502, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2505 = torch.aten.view %2504, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2506 = torch.aten.add.Tensor %2505, %2493, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %2507 = torch.aten._reshape_alias %2506, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %2508 = torch.aten.permute %2507, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
  %2509 = torch.aten._convolution %2508, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2510 = torch.aten.add.Tensor %2509, %2377, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2511 = torch.prim.ListConstruct %2510, %698 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
  %2512 = torch.aten.cat %2511, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
  %2513 = torch.aten.clone %2512, %int0 : !torch.vtensor<[2,1280,32,32],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
  %2514 = torch.prim.ListConstruct %int2, %int32, %int40, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2515 = torch.aten.view %2513, %2514 : !torch.vtensor<[2,1280,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,1024],f16>
  %2516 = torch.aten._to_copy %2515, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,1024],f32>
  %2517 = torch.aten.var.correction %2516, %93, %int0, %true : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2518 = torch.aten.mean.dim %2516, %93, %true, %none : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2519 = torch.aten.add.Tensor %2517, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2520 = torch.aten.rsqrt %2519 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2521 = torch.aten.sub.Tensor %2515, %2518, %int1 : !torch.vtensor<[2,32,40,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,1024],f32>
  %2522 = torch.aten.mul.Tensor %2521, %2520 : !torch.vtensor<[2,32,40,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,1024],f32>
  %2523 = torch.prim.ListConstruct %int2, %int1280, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2524 = torch.aten.view %2522, %2523 : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,32,32],f32>
  %2525 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2526 = torch.aten.unsqueeze %2525, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2527 = torch.aten.mul.Tensor %2524, %2526 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,32,32],f32>
  %2528 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
  %2529 = torch.aten.unsqueeze %2528, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
  %2530 = torch.aten.add.Tensor %2527, %2529, %int1 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f32>
  %2531 = torch.aten._to_copy %2530, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,32,32],f16>
  %2532 = torch.aten.silu %2531 : !torch.vtensor<[2,1280,32,32],f16> -> !torch.vtensor<[2,1280,32,32],f16>
  %2533 = torch.aten._convolution %2532, %30, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2534 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2535 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %2536 = torch.aten.addmm %45, %2534, %2535, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2537 = torch.aten.slice.Tensor %2536, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2538 = torch.aten.slice.Tensor %2537, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2539 = torch.aten.unsqueeze %2538, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %2540 = torch.aten.unsqueeze %2539, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %2541 = torch.aten.add.Tensor %2533, %2540, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2542 = torch.aten.view %2541, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %2543 = torch.aten._to_copy %2542, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %2544 = torch.aten.var.correction %2543, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2545 = torch.aten.mean.dim %2543, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2546 = torch.aten.add.Tensor %2544, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2547 = torch.aten.rsqrt %2546 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2548 = torch.aten.sub.Tensor %2542, %2545, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %2549 = torch.aten.mul.Tensor %2548, %2547 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %2550 = torch.aten.view %2549, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %2551 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2552 = torch.aten.unsqueeze %2551, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2553 = torch.aten.mul.Tensor %2550, %2552 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %2554 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2555 = torch.aten.unsqueeze %2554, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2556 = torch.aten.add.Tensor %2553, %2555, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %2557 = torch.aten._to_copy %2556, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %2558 = torch.aten.silu %2557 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
  %2559 = torch.aten._convolution %2558, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2560 = torch.aten._convolution %2512, %31, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2561 = torch.aten.add.Tensor %2560, %2559, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2562 = torch.aten.div.Tensor %2561, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
  %2563 = torch.aten.clone %2562, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2564 = torch.aten.view %2563, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %2565 = torch.aten._to_copy %2564, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %2566 = torch.aten.var.correction %2565, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2567 = torch.aten.mean.dim %2565, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2568 = torch.aten.add.Tensor %2566, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2569 = torch.aten.rsqrt %2568 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2570 = torch.aten.sub.Tensor %2564, %2567, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %2571 = torch.aten.mul.Tensor %2570, %2569 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %2572 = torch.aten.view %2571, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %2573 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2574 = torch.aten.unsqueeze %2573, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2575 = torch.aten.mul.Tensor %2572, %2574 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %2576 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2577 = torch.aten.unsqueeze %2576, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2578 = torch.aten.add.Tensor %2575, %2577, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %2579 = torch.aten._to_copy %2578, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %2580 = torch.aten._convolution %2579, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2581 = torch.aten.permute %2580, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %2582 = torch.aten._reshape_alias %2581, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %result0_97, %result1_98, %result2_99 = torch.aten.native_layer_norm %2582, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2583 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2584 = torch.aten._reshape_alias %result0_97, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2585 = torch.aten.mm %2584, %2583 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2586 = torch.aten._unsafe_view %2585, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2587 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2588 = torch.aten._reshape_alias %result0_97, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2589 = torch.aten.mm %2588, %2587 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2590 = torch.aten._unsafe_view %2589, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2591 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2592 = torch.aten._reshape_alias %result0_97, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2593 = torch.aten.mm %2592, %2591 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2594 = torch.aten._unsafe_view %2593, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2595 = torch.aten._reshape_alias %2586, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2596 = torch.aten.permute %2595, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2597 = torch.aten.clone %2596, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2598 = torch.aten._unsafe_view %2597, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2599 = torch.aten._reshape_alias %2590, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2600 = torch.aten.permute %2599, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2601 = torch.aten.clone %2600, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2602 = torch.aten._unsafe_view %2601, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2603 = torch.aten._reshape_alias %2594, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2604 = torch.aten.permute %2603, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2605 = torch.aten.clone %2604, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2606 = torch.aten._unsafe_view %2605, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2607 = torch.aten.transpose.int %2602, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
  %2608 = torch.aten.expand %2598, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2609 = torch.aten._reshape_alias %2608, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2610 = torch.aten.expand %2607, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
  %2611 = torch.aten._reshape_alias %2610, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
  %2612 = torch.aten.bmm %2609, %2611 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
  %2613 = torch.aten._unsafe_view %2612, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %2614 = torch.aten.mul.Tensor %2613, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
  %2615 = torch.aten._softmax %2614, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %2616 = torch.aten.expand %2615, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %2617 = torch.aten._reshape_alias %2616, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %2618 = torch.aten.expand %2606, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2619 = torch.aten._reshape_alias %2618, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2620 = torch.aten.bmm %2617, %2619 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %2621 = torch.aten._unsafe_view %2620, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2622 = torch.aten._reshape_alias %2621, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2623 = torch.aten.permute %2622, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2624 = torch.aten.clone %2623, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %2625 = torch.aten._unsafe_view %2624, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2626 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2627 = torch.aten.view %2625, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2628 = torch.aten.addmm %45, %2627, %2626, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2629 = torch.aten.view %2628, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2630 = torch.aten.add.Tensor %2629, %2582, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_100, %result1_101, %result2_102 = torch.aten.native_layer_norm %2630, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2631 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2632 = torch.aten._reshape_alias %result0_100, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2633 = torch.aten.mm %2632, %2631 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2634 = torch.aten._unsafe_view %2633, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2635 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %2636 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2637 = torch.aten.mm %2636, %2635 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %2638 = torch.aten._unsafe_view %2637, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %2639 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %2640 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2641 = torch.aten.mm %2640, %2639 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %2642 = torch.aten._unsafe_view %2641, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %2643 = torch.aten._reshape_alias %2634, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2644 = torch.aten.permute %2643, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2645 = torch.aten.clone %2644, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2646 = torch.aten._unsafe_view %2645, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2647 = torch.aten._reshape_alias %2638, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %2648 = torch.aten.permute %2647, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %2649 = torch.aten.clone %2648, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %2650 = torch.aten._unsafe_view %2649, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2651 = torch.aten._reshape_alias %2642, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %2652 = torch.aten.permute %2651, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %2653 = torch.aten.clone %2652, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %2654 = torch.aten._unsafe_view %2653, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2655 = torch.aten.transpose.int %2650, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
  %2656 = torch.aten.expand %2646, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2657 = torch.aten._reshape_alias %2656, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2658 = torch.aten.expand %2655, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
  %2659 = torch.aten._reshape_alias %2658, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
  %2660 = torch.aten.bmm %2657, %2659 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
  %2661 = torch.aten._unsafe_view %2660, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %2662 = torch.aten.mul.Tensor %2661, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
  %2663 = torch.aten._softmax %2662, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %2664 = torch.aten.expand %2663, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %2665 = torch.aten._reshape_alias %2664, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %2666 = torch.aten.expand %2654, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
  %2667 = torch.aten._reshape_alias %2666, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2668 = torch.aten.bmm %2665, %2667 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %2669 = torch.aten._unsafe_view %2668, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2670 = torch.aten._reshape_alias %2669, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2671 = torch.aten.permute %2670, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2672 = torch.aten.clone %2671, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %2673 = torch.aten._unsafe_view %2672, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2674 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2675 = torch.aten.view %2673, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2676 = torch.aten.addmm %45, %2675, %2674, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2677 = torch.aten.view %2676, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2678 = torch.aten.add.Tensor %2677, %2630, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_103, %result1_104, %result2_105 = torch.aten.native_layer_norm %2678, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2679 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %2680 = torch.aten.view %result0_103, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2681 = torch.aten.addmm %38, %2680, %2679, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
  %2682 = torch.aten.view %2681, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
  %2683 = torch.aten.slice.Tensor %2682, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %2684 = torch.aten.slice.Tensor %2682, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %2685 = torch.aten.gelu %2684, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
  %2686 = torch.aten.mul.Tensor %2683, %2685 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
  %2687 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %2688 = torch.aten.view %2686, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
  %2689 = torch.aten.addmm %45, %2688, %2687, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2690 = torch.aten.view %2689, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2691 = torch.aten.add.Tensor %2690, %2678, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %2692 = torch.aten._reshape_alias %2691, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %2693 = torch.aten.permute %2692, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
  %2694 = torch.aten._convolution %2693, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2695 = torch.aten.add.Tensor %2694, %2562, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2696 = torch.prim.ListConstruct %2695, %485 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,320,32,32],f16>) -> !torch.list<vtensor>
  %2697 = torch.aten.cat %2696, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
  %2698 = torch.aten.clone %2697, %int0 : !torch.vtensor<[2,960,32,32],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
  %2699 = torch.prim.ListConstruct %int2, %int32, %int30, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2700 = torch.aten.view %2698, %2699 : !torch.vtensor<[2,960,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,1024],f16>
  %2701 = torch.aten._to_copy %2700, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,30,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,1024],f32>
  %2702 = torch.aten.var.correction %2701, %93, %int0, %true : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2703 = torch.aten.mean.dim %2701, %93, %true, %none : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2704 = torch.aten.add.Tensor %2702, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2705 = torch.aten.rsqrt %2704 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2706 = torch.aten.sub.Tensor %2700, %2703, %int1 : !torch.vtensor<[2,32,30,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,1024],f32>
  %2707 = torch.aten.mul.Tensor %2706, %2705 : !torch.vtensor<[2,32,30,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,1024],f32>
  %2708 = torch.prim.ListConstruct %int2, %int960, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2709 = torch.aten.view %2707, %2708 : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,960,32,32],f32>
  %2710 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
  %2711 = torch.aten.unsqueeze %2710, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
  %2712 = torch.aten.mul.Tensor %2709, %2711 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,32,32],f32>
  %2713 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
  %2714 = torch.aten.unsqueeze %2713, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
  %2715 = torch.aten.add.Tensor %2712, %2714, %int1 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f32>
  %2716 = torch.aten._to_copy %2715, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,32,32],f16>
  %2717 = torch.aten.silu %2716 : !torch.vtensor<[2,960,32,32],f16> -> !torch.vtensor<[2,960,32,32],f16>
  %2718 = torch.aten._convolution %2717, %32, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2719 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2720 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %2721 = torch.aten.addmm %45, %2719, %2720, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2722 = torch.aten.slice.Tensor %2721, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2723 = torch.aten.slice.Tensor %2722, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %2724 = torch.aten.unsqueeze %2723, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %2725 = torch.aten.unsqueeze %2724, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %2726 = torch.aten.add.Tensor %2718, %2725, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2727 = torch.aten.view %2726, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %2728 = torch.aten._to_copy %2727, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %2729 = torch.aten.var.correction %2728, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2730 = torch.aten.mean.dim %2728, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2731 = torch.aten.add.Tensor %2729, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2732 = torch.aten.rsqrt %2731 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2733 = torch.aten.sub.Tensor %2727, %2730, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %2734 = torch.aten.mul.Tensor %2733, %2732 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %2735 = torch.aten.view %2734, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %2736 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2737 = torch.aten.unsqueeze %2736, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2738 = torch.aten.mul.Tensor %2735, %2737 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %2739 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2740 = torch.aten.unsqueeze %2739, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2741 = torch.aten.add.Tensor %2738, %2740, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %2742 = torch.aten._to_copy %2741, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %2743 = torch.aten.silu %2742 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
  %2744 = torch.aten._convolution %2743, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2745 = torch.aten._convolution %2697, %34, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2746 = torch.aten.add.Tensor %2745, %2744, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2747 = torch.aten.div.Tensor %2746, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
  %2748 = torch.aten.clone %2747, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2749 = torch.aten.view %2748, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
  %2750 = torch.aten._to_copy %2749, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
  %2751 = torch.aten.var.correction %2750, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2752 = torch.aten.mean.dim %2750, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2753 = torch.aten.add.Tensor %2751, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2754 = torch.aten.rsqrt %2753 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2755 = torch.aten.sub.Tensor %2749, %2752, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
  %2756 = torch.aten.mul.Tensor %2755, %2754 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
  %2757 = torch.aten.view %2756, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
  %2758 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2759 = torch.aten.unsqueeze %2758, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2760 = torch.aten.mul.Tensor %2757, %2759 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
  %2761 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %2762 = torch.aten.unsqueeze %2761, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %2763 = torch.aten.add.Tensor %2760, %2762, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
  %2764 = torch.aten._to_copy %2763, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
  %2765 = torch.aten._convolution %2764, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2766 = torch.aten.permute %2765, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %2767 = torch.aten._reshape_alias %2766, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %result0_106, %result1_107, %result2_108 = torch.aten.native_layer_norm %2767, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2768 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2769 = torch.aten._reshape_alias %result0_106, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2770 = torch.aten.mm %2769, %2768 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2771 = torch.aten._unsafe_view %2770, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2772 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2773 = torch.aten._reshape_alias %result0_106, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2774 = torch.aten.mm %2773, %2772 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2775 = torch.aten._unsafe_view %2774, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2776 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2777 = torch.aten._reshape_alias %result0_106, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2778 = torch.aten.mm %2777, %2776 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2779 = torch.aten._unsafe_view %2778, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2780 = torch.aten._reshape_alias %2771, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2781 = torch.aten.permute %2780, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2782 = torch.aten.clone %2781, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2783 = torch.aten._unsafe_view %2782, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2784 = torch.aten._reshape_alias %2775, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2785 = torch.aten.permute %2784, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2786 = torch.aten.clone %2785, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2787 = torch.aten._unsafe_view %2786, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2788 = torch.aten._reshape_alias %2779, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2789 = torch.aten.permute %2788, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2790 = torch.aten.clone %2789, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2791 = torch.aten._unsafe_view %2790, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2792 = torch.aten.transpose.int %2787, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
  %2793 = torch.aten.expand %2783, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2794 = torch.aten._reshape_alias %2793, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2795 = torch.aten.expand %2792, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
  %2796 = torch.aten._reshape_alias %2795, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
  %2797 = torch.aten.bmm %2794, %2796 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
  %2798 = torch.aten._unsafe_view %2797, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %2799 = torch.aten.mul.Tensor %2798, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
  %2800 = torch.aten._softmax %2799, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %2801 = torch.aten.expand %2800, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
  %2802 = torch.aten._reshape_alias %2801, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
  %2803 = torch.aten.expand %2791, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2804 = torch.aten._reshape_alias %2803, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2805 = torch.aten.bmm %2802, %2804 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %2806 = torch.aten._unsafe_view %2805, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2807 = torch.aten._reshape_alias %2806, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2808 = torch.aten.permute %2807, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2809 = torch.aten.clone %2808, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %2810 = torch.aten._unsafe_view %2809, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2811 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2812 = torch.aten.view %2810, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2813 = torch.aten.addmm %45, %2812, %2811, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2814 = torch.aten.view %2813, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2815 = torch.aten.add.Tensor %2814, %2767, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_109, %result1_110, %result2_111 = torch.aten.native_layer_norm %2815, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2816 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2817 = torch.aten._reshape_alias %result0_109, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2818 = torch.aten.mm %2817, %2816 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
  %2819 = torch.aten._unsafe_view %2818, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2820 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %2821 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2822 = torch.aten.mm %2821, %2820 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %2823 = torch.aten._unsafe_view %2822, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %2824 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
  %2825 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %2826 = torch.aten.mm %2825, %2824 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
  %2827 = torch.aten._unsafe_view %2826, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %2828 = torch.aten._reshape_alias %2819, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2829 = torch.aten.permute %2828, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2830 = torch.aten.clone %2829, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
  %2831 = torch.aten._unsafe_view %2830, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2832 = torch.aten._reshape_alias %2823, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %2833 = torch.aten.permute %2832, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %2834 = torch.aten.clone %2833, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %2835 = torch.aten._unsafe_view %2834, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2836 = torch.aten._reshape_alias %2827, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
  %2837 = torch.aten.permute %2836, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
  %2838 = torch.aten.clone %2837, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
  %2839 = torch.aten._unsafe_view %2838, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2840 = torch.aten.transpose.int %2835, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
  %2841 = torch.aten.expand %2831, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
  %2842 = torch.aten._reshape_alias %2841, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2843 = torch.aten.expand %2840, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
  %2844 = torch.aten._reshape_alias %2843, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
  %2845 = torch.aten.bmm %2842, %2844 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
  %2846 = torch.aten._unsafe_view %2845, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %2847 = torch.aten.mul.Tensor %2846, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
  %2848 = torch.aten._softmax %2847, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %2849 = torch.aten.expand %2848, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
  %2850 = torch.aten._reshape_alias %2849, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
  %2851 = torch.aten.expand %2839, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
  %2852 = torch.aten._reshape_alias %2851, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
  %2853 = torch.aten.bmm %2850, %2852 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
  %2854 = torch.aten._unsafe_view %2853, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
  %2855 = torch.aten._reshape_alias %2854, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
  %2856 = torch.aten.permute %2855, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
  %2857 = torch.aten.clone %2856, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
  %2858 = torch.aten._unsafe_view %2857, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2859 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %2860 = torch.aten.view %2858, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2861 = torch.aten.addmm %45, %2860, %2859, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2862 = torch.aten.view %2861, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2863 = torch.aten.add.Tensor %2862, %2815, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %result0_112, %result1_113, %result2_114 = torch.aten.native_layer_norm %2863, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
  %2864 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %2865 = torch.aten.view %result0_112, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
  %2866 = torch.aten.addmm %38, %2865, %2864, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
  %2867 = torch.aten.view %2866, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
  %2868 = torch.aten.slice.Tensor %2867, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %2869 = torch.aten.slice.Tensor %2867, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
  %2870 = torch.aten.gelu %2869, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
  %2871 = torch.aten.mul.Tensor %2868, %2870 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
  %2872 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %2873 = torch.aten.view %2871, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
  %2874 = torch.aten.addmm %45, %2873, %2872, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
  %2875 = torch.aten.view %2874, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
  %2876 = torch.aten.add.Tensor %2875, %2863, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
  %2877 = torch.aten._reshape_alias %2876, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
  %2878 = torch.aten.permute %2877, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
  %2879 = torch.aten._convolution %2878, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
  %2880 = torch.aten.add.Tensor %2879, %2747, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
  %2881 = torch.aten.upsample_nearest2d.vec %2880, %none, %1768 : !torch.vtensor<[2,640,32,32],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,640,64,64],f16>
  %2882 = torch.aten._convolution %2881, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,64,64],f16>
  %2883 = torch.prim.ListConstruct %2882, %483 : (!torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
  %2884 = torch.aten.cat %2883, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
  %2885 = torch.aten.clone %2884, %int0 : !torch.vtensor<[2,960,64,64],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
  %2886 = torch.prim.ListConstruct %int2, %int32, %int30, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2887 = torch.aten.view %2885, %2886 : !torch.vtensor<[2,960,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,4096],f16>
  %2888 = torch.aten._to_copy %2887, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,30,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,4096],f32>
  %2889 = torch.aten.var.correction %2888, %93, %int0, %true : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2890 = torch.aten.mean.dim %2888, %93, %true, %none : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2891 = torch.aten.add.Tensor %2889, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2892 = torch.aten.rsqrt %2891 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2893 = torch.aten.sub.Tensor %2887, %2890, %int1 : !torch.vtensor<[2,32,30,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,4096],f32>
  %2894 = torch.aten.mul.Tensor %2893, %2892 : !torch.vtensor<[2,32,30,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,4096],f32>
  %2895 = torch.prim.ListConstruct %int2, %int960, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2896 = torch.aten.view %2894, %2895 : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,960,64,64],f32>
  %2897 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
  %2898 = torch.aten.unsqueeze %2897, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
  %2899 = torch.aten.mul.Tensor %2896, %2898 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,64,64],f32>
  %2900 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
  %2901 = torch.aten.unsqueeze %2900, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
  %2902 = torch.aten.add.Tensor %2899, %2901, %int1 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f32>
  %2903 = torch.aten._to_copy %2902, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,64,64],f16>
  %2904 = torch.aten.silu %2903 : !torch.vtensor<[2,960,64,64],f16> -> !torch.vtensor<[2,960,64,64],f16>
  %2905 = torch.aten._convolution %2904, %43, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %2906 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2907 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %2908 = torch.aten.addmm %55, %2906, %2907, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %2909 = torch.aten.slice.Tensor %2908, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %2910 = torch.aten.slice.Tensor %2909, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %2911 = torch.aten.unsqueeze %2910, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %2912 = torch.aten.unsqueeze %2911, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %2913 = torch.aten.add.Tensor %2905, %2912, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %2914 = torch.aten.view %2913, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %2915 = torch.aten._to_copy %2914, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %2916 = torch.aten.var.correction %2915, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2917 = torch.aten.mean.dim %2915, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2918 = torch.aten.add.Tensor %2916, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2919 = torch.aten.rsqrt %2918 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2920 = torch.aten.sub.Tensor %2914, %2917, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %2921 = torch.aten.mul.Tensor %2920, %2919 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %2922 = torch.aten.view %2921, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %2923 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %2924 = torch.aten.unsqueeze %2923, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %2925 = torch.aten.mul.Tensor %2922, %2924 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %2926 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %2927 = torch.aten.unsqueeze %2926, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %2928 = torch.aten.add.Tensor %2925, %2927, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %2929 = torch.aten._to_copy %2928, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %2930 = torch.aten.silu %2929 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %2931 = torch.aten._convolution %2930, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %2932 = torch.aten._convolution %2884, %44, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %2933 = torch.aten.add.Tensor %2932, %2931, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %2934 = torch.aten.div.Tensor %2933, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
  %2935 = torch.aten.clone %2934, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %2936 = torch.aten.view %2935, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %2937 = torch.aten._to_copy %2936, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %2938 = torch.aten.var.correction %2937, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %2939 = torch.aten.mean.dim %2937, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %2940 = torch.aten.add.Tensor %2938, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2941 = torch.aten.rsqrt %2940 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2942 = torch.aten.sub.Tensor %2936, %2939, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %2943 = torch.aten.mul.Tensor %2942, %2941 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %2944 = torch.aten.view %2943, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %2945 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %2946 = torch.aten.unsqueeze %2945, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %2947 = torch.aten.mul.Tensor %2944, %2946 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %2948 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %2949 = torch.aten.unsqueeze %2948, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %2950 = torch.aten.add.Tensor %2947, %2949, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %2951 = torch.aten._to_copy %2950, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %2952 = torch.aten._convolution %2951, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %2953 = torch.aten.permute %2952, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %2954 = torch.aten._reshape_alias %2953, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %result0_115, %result1_116, %result2_117 = torch.aten.native_layer_norm %2954, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %2955 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %2956 = torch.aten._reshape_alias %result0_115, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %2957 = torch.aten.mm %2956, %2955 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %2958 = torch.aten._unsafe_view %2957, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %2959 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %2960 = torch.aten._reshape_alias %result0_115, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %2961 = torch.aten.mm %2960, %2959 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %2962 = torch.aten._unsafe_view %2961, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %2963 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %2964 = torch.aten._reshape_alias %result0_115, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %2965 = torch.aten.mm %2964, %2963 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %2966 = torch.aten._unsafe_view %2965, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %2967 = torch.aten._reshape_alias %2958, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %2968 = torch.aten.permute %2967, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %2969 = torch.aten.clone %2968, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %2970 = torch.aten._unsafe_view %2969, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %2971 = torch.aten._reshape_alias %2962, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %2972 = torch.aten.permute %2971, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %2973 = torch.aten.clone %2972, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %2974 = torch.aten._unsafe_view %2973, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %2975 = torch.aten._reshape_alias %2966, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %2976 = torch.aten.permute %2975, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %2977 = torch.aten.clone %2976, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %2978 = torch.aten._unsafe_view %2977, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %2979 = torch.aten.transpose.int %2974, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
  %2980 = torch.aten.expand %2970, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %2981 = torch.aten._reshape_alias %2980, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %2982 = torch.aten.expand %2979, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
  %2983 = torch.aten._reshape_alias %2982, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
  %2984 = torch.aten.bmm %2981, %2983 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
  %2985 = torch.aten._unsafe_view %2984, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %2986 = torch.aten.mul.Tensor %2985, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
  %2987 = torch.aten._softmax %2986, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %2988 = torch.aten.expand %2987, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %2989 = torch.aten._reshape_alias %2988, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %2990 = torch.aten.expand %2978, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %2991 = torch.aten._reshape_alias %2990, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %2992 = torch.aten.bmm %2989, %2991 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %2993 = torch.aten._unsafe_view %2992, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %2994 = torch.aten._reshape_alias %2993, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %2995 = torch.aten.permute %2994, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %2996 = torch.aten.clone %2995, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %2997 = torch.aten._unsafe_view %2996, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %2998 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %2999 = torch.aten.view %2997, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3000 = torch.aten.addmm %55, %2999, %2998, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3001 = torch.aten.view %3000, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3002 = torch.aten.add.Tensor %3001, %2954, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_118, %result1_119, %result2_120 = torch.aten.native_layer_norm %3002, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3003 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3004 = torch.aten._reshape_alias %result0_118, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3005 = torch.aten.mm %3004, %3003 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3006 = torch.aten._unsafe_view %3005, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3007 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %3008 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %3009 = torch.aten.mm %3008, %3007 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %3010 = torch.aten._unsafe_view %3009, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3011 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %3012 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %3013 = torch.aten.mm %3012, %3011 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %3014 = torch.aten._unsafe_view %3013, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3015 = torch.aten._reshape_alias %3006, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3016 = torch.aten.permute %3015, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3017 = torch.aten.clone %3016, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3018 = torch.aten._unsafe_view %3017, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3019 = torch.aten._reshape_alias %3010, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %3020 = torch.aten.permute %3019, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %3021 = torch.aten.clone %3020, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %3022 = torch.aten._unsafe_view %3021, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3023 = torch.aten._reshape_alias %3014, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %3024 = torch.aten.permute %3023, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %3025 = torch.aten.clone %3024, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %3026 = torch.aten._unsafe_view %3025, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3027 = torch.aten.transpose.int %3022, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
  %3028 = torch.aten.expand %3018, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3029 = torch.aten._reshape_alias %3028, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3030 = torch.aten.expand %3027, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
  %3031 = torch.aten._reshape_alias %3030, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
  %3032 = torch.aten.bmm %3029, %3031 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
  %3033 = torch.aten._unsafe_view %3032, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %3034 = torch.aten.mul.Tensor %3033, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
  %3035 = torch.aten._softmax %3034, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %3036 = torch.aten.expand %3035, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %3037 = torch.aten._reshape_alias %3036, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %3038 = torch.aten.expand %3026, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
  %3039 = torch.aten._reshape_alias %3038, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3040 = torch.aten.bmm %3037, %3039 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %3041 = torch.aten._unsafe_view %3040, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3042 = torch.aten._reshape_alias %3041, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3043 = torch.aten.permute %3042, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3044 = torch.aten.clone %3043, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %3045 = torch.aten._unsafe_view %3044, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3046 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3047 = torch.aten.view %3045, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3048 = torch.aten.addmm %55, %3047, %3046, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3049 = torch.aten.view %3048, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3050 = torch.aten.add.Tensor %3049, %3002, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_121, %result1_122, %result2_123 = torch.aten.native_layer_norm %3050, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3051 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %3052 = torch.aten.view %result0_121, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3053 = torch.aten.addmm %52, %3052, %3051, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
  %3054 = torch.aten.view %3053, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
  %3055 = torch.aten.slice.Tensor %3054, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %3056 = torch.aten.slice.Tensor %3054, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %3057 = torch.aten.gelu %3056, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
  %3058 = torch.aten.mul.Tensor %3055, %3057 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
  %3059 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %3060 = torch.aten.view %3058, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
  %3061 = torch.aten.addmm %55, %3060, %3059, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3062 = torch.aten.view %3061, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3063 = torch.aten.add.Tensor %3062, %3050, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %3064 = torch.aten._reshape_alias %3063, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %3065 = torch.aten.permute %3064, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
  %3066 = torch.aten._convolution %3065, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3067 = torch.aten.add.Tensor %3066, %2934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3068 = torch.prim.ListConstruct %3067, %303 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
  %3069 = torch.aten.cat %3068, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
  %3070 = torch.aten.clone %3069, %int0 : !torch.vtensor<[2,640,64,64],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
  %3071 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3072 = torch.aten.view %3070, %3071 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
  %3073 = torch.aten._to_copy %3072, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f32>
  %3074 = torch.aten.var.correction %3073, %93, %int0, %true : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3075 = torch.aten.mean.dim %3073, %93, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3076 = torch.aten.add.Tensor %3074, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3077 = torch.aten.rsqrt %3076 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3078 = torch.aten.sub.Tensor %3072, %3075, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
  %3079 = torch.aten.mul.Tensor %3078, %3077 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
  %3080 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3081 = torch.aten.view %3079, %3080 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
  %3082 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %3083 = torch.aten.unsqueeze %3082, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %3084 = torch.aten.mul.Tensor %3081, %3083 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
  %3085 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %3086 = torch.aten.unsqueeze %3085, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %3087 = torch.aten.add.Tensor %3084, %3086, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
  %3088 = torch.aten._to_copy %3087, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,64,64],f16>
  %3089 = torch.aten.silu %3088 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
  %3090 = torch.aten._convolution %3089, %46, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3091 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %3092 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %3093 = torch.aten.addmm %55, %3091, %3092, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3094 = torch.aten.slice.Tensor %3093, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3095 = torch.aten.slice.Tensor %3094, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3096 = torch.aten.unsqueeze %3095, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %3097 = torch.aten.unsqueeze %3096, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %3098 = torch.aten.add.Tensor %3090, %3097, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3099 = torch.aten.view %3098, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %3100 = torch.aten._to_copy %3099, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %3101 = torch.aten.var.correction %3100, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3102 = torch.aten.mean.dim %3100, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3103 = torch.aten.add.Tensor %3101, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3104 = torch.aten.rsqrt %3103 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3105 = torch.aten.sub.Tensor %3099, %3102, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %3106 = torch.aten.mul.Tensor %3105, %3104 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %3107 = torch.aten.view %3106, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %3108 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3109 = torch.aten.unsqueeze %3108, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3110 = torch.aten.mul.Tensor %3107, %3109 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %3111 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3112 = torch.aten.unsqueeze %3111, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3113 = torch.aten.add.Tensor %3110, %3112, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %3114 = torch.aten._to_copy %3113, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %3115 = torch.aten.silu %3114 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %3116 = torch.aten._convolution %3115, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3117 = torch.aten._convolution %3069, %48, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3118 = torch.aten.add.Tensor %3117, %3116, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3119 = torch.aten.div.Tensor %3118, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
  %3120 = torch.aten.clone %3119, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3121 = torch.aten.view %3120, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %3122 = torch.aten._to_copy %3121, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %3123 = torch.aten.var.correction %3122, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3124 = torch.aten.mean.dim %3122, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3125 = torch.aten.add.Tensor %3123, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3126 = torch.aten.rsqrt %3125 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3127 = torch.aten.sub.Tensor %3121, %3124, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %3128 = torch.aten.mul.Tensor %3127, %3126 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %3129 = torch.aten.view %3128, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %3130 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3131 = torch.aten.unsqueeze %3130, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3132 = torch.aten.mul.Tensor %3129, %3131 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %3133 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3134 = torch.aten.unsqueeze %3133, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3135 = torch.aten.add.Tensor %3132, %3134, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %3136 = torch.aten._to_copy %3135, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %3137 = torch.aten._convolution %3136, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3138 = torch.aten.permute %3137, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %3139 = torch.aten._reshape_alias %3138, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %result0_124, %result1_125, %result2_126 = torch.aten.native_layer_norm %3139, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3140 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3141 = torch.aten._reshape_alias %result0_124, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3142 = torch.aten.mm %3141, %3140 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3143 = torch.aten._unsafe_view %3142, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3144 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3145 = torch.aten._reshape_alias %result0_124, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3146 = torch.aten.mm %3145, %3144 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3147 = torch.aten._unsafe_view %3146, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3148 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3149 = torch.aten._reshape_alias %result0_124, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3150 = torch.aten.mm %3149, %3148 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3151 = torch.aten._unsafe_view %3150, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3152 = torch.aten._reshape_alias %3143, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3153 = torch.aten.permute %3152, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3154 = torch.aten.clone %3153, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3155 = torch.aten._unsafe_view %3154, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3156 = torch.aten._reshape_alias %3147, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3157 = torch.aten.permute %3156, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3158 = torch.aten.clone %3157, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3159 = torch.aten._unsafe_view %3158, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3160 = torch.aten._reshape_alias %3151, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3161 = torch.aten.permute %3160, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3162 = torch.aten.clone %3161, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3163 = torch.aten._unsafe_view %3162, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3164 = torch.aten.transpose.int %3159, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
  %3165 = torch.aten.expand %3155, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3166 = torch.aten._reshape_alias %3165, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3167 = torch.aten.expand %3164, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
  %3168 = torch.aten._reshape_alias %3167, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
  %3169 = torch.aten.bmm %3166, %3168 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
  %3170 = torch.aten._unsafe_view %3169, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %3171 = torch.aten.mul.Tensor %3170, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
  %3172 = torch.aten._softmax %3171, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %3173 = torch.aten.expand %3172, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %3174 = torch.aten._reshape_alias %3173, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %3175 = torch.aten.expand %3163, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3176 = torch.aten._reshape_alias %3175, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3177 = torch.aten.bmm %3174, %3176 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %3178 = torch.aten._unsafe_view %3177, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3179 = torch.aten._reshape_alias %3178, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3180 = torch.aten.permute %3179, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3181 = torch.aten.clone %3180, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %3182 = torch.aten._unsafe_view %3181, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3183 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3184 = torch.aten.view %3182, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3185 = torch.aten.addmm %55, %3184, %3183, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3186 = torch.aten.view %3185, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3187 = torch.aten.add.Tensor %3186, %3139, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_127, %result1_128, %result2_129 = torch.aten.native_layer_norm %3187, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3188 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3189 = torch.aten._reshape_alias %result0_127, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3190 = torch.aten.mm %3189, %3188 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3191 = torch.aten._unsafe_view %3190, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3192 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %3193 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %3194 = torch.aten.mm %3193, %3192 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %3195 = torch.aten._unsafe_view %3194, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3196 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %3197 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %3198 = torch.aten.mm %3197, %3196 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %3199 = torch.aten._unsafe_view %3198, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3200 = torch.aten._reshape_alias %3191, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3201 = torch.aten.permute %3200, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3202 = torch.aten.clone %3201, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3203 = torch.aten._unsafe_view %3202, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3204 = torch.aten._reshape_alias %3195, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %3205 = torch.aten.permute %3204, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %3206 = torch.aten.clone %3205, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %3207 = torch.aten._unsafe_view %3206, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3208 = torch.aten._reshape_alias %3199, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %3209 = torch.aten.permute %3208, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %3210 = torch.aten.clone %3209, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %3211 = torch.aten._unsafe_view %3210, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3212 = torch.aten.transpose.int %3207, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
  %3213 = torch.aten.expand %3203, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3214 = torch.aten._reshape_alias %3213, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3215 = torch.aten.expand %3212, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
  %3216 = torch.aten._reshape_alias %3215, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
  %3217 = torch.aten.bmm %3214, %3216 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
  %3218 = torch.aten._unsafe_view %3217, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %3219 = torch.aten.mul.Tensor %3218, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
  %3220 = torch.aten._softmax %3219, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %3221 = torch.aten.expand %3220, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %3222 = torch.aten._reshape_alias %3221, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %3223 = torch.aten.expand %3211, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
  %3224 = torch.aten._reshape_alias %3223, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3225 = torch.aten.bmm %3222, %3224 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %3226 = torch.aten._unsafe_view %3225, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3227 = torch.aten._reshape_alias %3226, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3228 = torch.aten.permute %3227, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3229 = torch.aten.clone %3228, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %3230 = torch.aten._unsafe_view %3229, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3231 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3232 = torch.aten.view %3230, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3233 = torch.aten.addmm %55, %3232, %3231, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3234 = torch.aten.view %3233, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3235 = torch.aten.add.Tensor %3234, %3187, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_130, %result1_131, %result2_132 = torch.aten.native_layer_norm %3235, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3236 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %3237 = torch.aten.view %result0_130, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3238 = torch.aten.addmm %52, %3237, %3236, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
  %3239 = torch.aten.view %3238, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
  %3240 = torch.aten.slice.Tensor %3239, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %3241 = torch.aten.slice.Tensor %3239, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %3242 = torch.aten.gelu %3241, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
  %3243 = torch.aten.mul.Tensor %3240, %3242 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
  %3244 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %3245 = torch.aten.view %3243, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
  %3246 = torch.aten.addmm %55, %3245, %3244, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3247 = torch.aten.view %3246, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3248 = torch.aten.add.Tensor %3247, %3235, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %3249 = torch.aten._reshape_alias %3248, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %3250 = torch.aten.permute %3249, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
  %3251 = torch.aten._convolution %3250, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3252 = torch.aten.add.Tensor %3251, %3119, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3253 = torch.prim.ListConstruct %3252, %89 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
  %3254 = torch.aten.cat %3253, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
  %3255 = torch.aten.view %3254, %3071 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
  %3256 = torch.aten._to_copy %3255, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f32>
  %3257 = torch.aten.var.correction %3256, %93, %int0, %true : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3258 = torch.aten.mean.dim %3256, %93, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3259 = torch.aten.add.Tensor %3257, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3260 = torch.aten.rsqrt %3259 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3261 = torch.aten.sub.Tensor %3255, %3258, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
  %3262 = torch.aten.mul.Tensor %3261, %3260 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
  %3263 = torch.aten.view %3262, %3080 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
  %3264 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %3265 = torch.aten.unsqueeze %3264, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %3266 = torch.aten.mul.Tensor %3263, %3265 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
  %3267 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
  %3268 = torch.aten.unsqueeze %3267, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
  %3269 = torch.aten.add.Tensor %3266, %3268, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
  %3270 = torch.aten._to_copy %3269, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,64,64],f16>
  %3271 = torch.aten.silu %3270 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
  %3272 = torch.aten._convolution %3271, %46, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3273 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %3274 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %3275 = torch.aten.addmm %55, %3273, %3274, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3276 = torch.aten.slice.Tensor %3275, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3277 = torch.aten.slice.Tensor %3276, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3278 = torch.aten.unsqueeze %3277, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %3279 = torch.aten.unsqueeze %3278, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %3280 = torch.aten.add.Tensor %3272, %3279, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3281 = torch.aten.view %3280, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %3282 = torch.aten._to_copy %3281, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %3283 = torch.aten.var.correction %3282, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3284 = torch.aten.mean.dim %3282, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3285 = torch.aten.add.Tensor %3283, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3286 = torch.aten.rsqrt %3285 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3287 = torch.aten.sub.Tensor %3281, %3284, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %3288 = torch.aten.mul.Tensor %3287, %3286 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %3289 = torch.aten.view %3288, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %3290 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3291 = torch.aten.unsqueeze %3290, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3292 = torch.aten.mul.Tensor %3289, %3291 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %3293 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3294 = torch.aten.unsqueeze %3293, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3295 = torch.aten.add.Tensor %3292, %3294, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %3296 = torch.aten._to_copy %3295, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %3297 = torch.aten.silu %3296 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %3298 = torch.aten._convolution %3297, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3299 = torch.aten._convolution %3254, %48, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3300 = torch.aten.add.Tensor %3299, %3298, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3301 = torch.aten.div.Tensor %3300, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
  %3302 = torch.aten.view %3301, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %3303 = torch.aten._to_copy %3302, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %3304 = torch.aten.var.correction %3303, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3305 = torch.aten.mean.dim %3303, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3306 = torch.aten.add.Tensor %3304, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3307 = torch.aten.rsqrt %3306 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3308 = torch.aten.sub.Tensor %3302, %3305, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %3309 = torch.aten.mul.Tensor %3308, %3307 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %3310 = torch.aten.view %3309, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %3311 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3312 = torch.aten.unsqueeze %3311, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3313 = torch.aten.mul.Tensor %3310, %3312 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %3314 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3315 = torch.aten.unsqueeze %3314, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3316 = torch.aten.add.Tensor %3313, %3315, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %3317 = torch.aten._to_copy %3316, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %3318 = torch.aten._convolution %3317, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3319 = torch.aten.permute %3318, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %3320 = torch.aten._reshape_alias %3319, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %result0_133, %result1_134, %result2_135 = torch.aten.native_layer_norm %3320, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3321 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3322 = torch.aten._reshape_alias %result0_133, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3323 = torch.aten.mm %3322, %3321 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3324 = torch.aten._unsafe_view %3323, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3325 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3326 = torch.aten._reshape_alias %result0_133, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3327 = torch.aten.mm %3326, %3325 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3328 = torch.aten._unsafe_view %3327, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3329 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3330 = torch.aten._reshape_alias %result0_133, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3331 = torch.aten.mm %3330, %3329 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3332 = torch.aten._unsafe_view %3331, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3333 = torch.aten._reshape_alias %3324, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3334 = torch.aten.permute %3333, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3335 = torch.aten.clone %3334, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3336 = torch.aten._unsafe_view %3335, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3337 = torch.aten._reshape_alias %3328, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3338 = torch.aten.permute %3337, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3339 = torch.aten.clone %3338, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3340 = torch.aten._unsafe_view %3339, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3341 = torch.aten._reshape_alias %3332, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3342 = torch.aten.permute %3341, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3343 = torch.aten.clone %3342, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3344 = torch.aten._unsafe_view %3343, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3345 = torch.aten.transpose.int %3340, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
  %3346 = torch.aten.expand %3336, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3347 = torch.aten._reshape_alias %3346, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3348 = torch.aten.expand %3345, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
  %3349 = torch.aten._reshape_alias %3348, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
  %3350 = torch.aten.bmm %3347, %3349 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
  %3351 = torch.aten._unsafe_view %3350, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %3352 = torch.aten.mul.Tensor %3351, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
  %3353 = torch.aten._softmax %3352, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %3354 = torch.aten.expand %3353, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
  %3355 = torch.aten._reshape_alias %3354, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
  %3356 = torch.aten.expand %3344, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3357 = torch.aten._reshape_alias %3356, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3358 = torch.aten.bmm %3355, %3357 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %3359 = torch.aten._unsafe_view %3358, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3360 = torch.aten._reshape_alias %3359, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3361 = torch.aten.permute %3360, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3362 = torch.aten.clone %3361, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %3363 = torch.aten._unsafe_view %3362, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3364 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3365 = torch.aten.view %3363, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3366 = torch.aten.addmm %55, %3365, %3364, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3367 = torch.aten.view %3366, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3368 = torch.aten.add.Tensor %3367, %3320, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_136, %result1_137, %result2_138 = torch.aten.native_layer_norm %3368, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3369 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3370 = torch.aten._reshape_alias %result0_136, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3371 = torch.aten.mm %3370, %3369 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
  %3372 = torch.aten._unsafe_view %3371, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3373 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %3374 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %3375 = torch.aten.mm %3374, %3373 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %3376 = torch.aten._unsafe_view %3375, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3377 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
  %3378 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
  %3379 = torch.aten.mm %3378, %3377 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
  %3380 = torch.aten._unsafe_view %3379, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3381 = torch.aten._reshape_alias %3372, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3382 = torch.aten.permute %3381, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3383 = torch.aten.clone %3382, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
  %3384 = torch.aten._unsafe_view %3383, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3385 = torch.aten._reshape_alias %3376, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %3386 = torch.aten.permute %3385, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %3387 = torch.aten.clone %3386, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %3388 = torch.aten._unsafe_view %3387, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3389 = torch.aten._reshape_alias %3380, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
  %3390 = torch.aten.permute %3389, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
  %3391 = torch.aten.clone %3390, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
  %3392 = torch.aten._unsafe_view %3391, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3393 = torch.aten.transpose.int %3388, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
  %3394 = torch.aten.expand %3384, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
  %3395 = torch.aten._reshape_alias %3394, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3396 = torch.aten.expand %3393, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
  %3397 = torch.aten._reshape_alias %3396, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
  %3398 = torch.aten.bmm %3395, %3397 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
  %3399 = torch.aten._unsafe_view %3398, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %3400 = torch.aten.mul.Tensor %3399, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
  %3401 = torch.aten._softmax %3400, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %3402 = torch.aten.expand %3401, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
  %3403 = torch.aten._reshape_alias %3402, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
  %3404 = torch.aten.expand %3392, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
  %3405 = torch.aten._reshape_alias %3404, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
  %3406 = torch.aten.bmm %3403, %3405 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
  %3407 = torch.aten._unsafe_view %3406, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
  %3408 = torch.aten._reshape_alias %3407, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
  %3409 = torch.aten.permute %3408, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
  %3410 = torch.aten.clone %3409, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
  %3411 = torch.aten._unsafe_view %3410, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3412 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3413 = torch.aten.view %3411, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3414 = torch.aten.addmm %55, %3413, %3412, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3415 = torch.aten.view %3414, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3416 = torch.aten.add.Tensor %3415, %3368, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %result0_139, %result1_140, %result2_141 = torch.aten.native_layer_norm %3416, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
  %3417 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %3418 = torch.aten.view %result0_139, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
  %3419 = torch.aten.addmm %52, %3418, %3417, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
  %3420 = torch.aten.view %3419, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
  %3421 = torch.aten.slice.Tensor %3420, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %3422 = torch.aten.slice.Tensor %3420, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
  %3423 = torch.aten.gelu %3422, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
  %3424 = torch.aten.mul.Tensor %3421, %3423 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
  %3425 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %3426 = torch.aten.view %3424, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
  %3427 = torch.aten.addmm %55, %3426, %3425, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
  %3428 = torch.aten.view %3427, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
  %3429 = torch.aten.add.Tensor %3428, %3416, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
  %3430 = torch.aten._reshape_alias %3429, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
  %3431 = torch.aten.permute %3430, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
  %3432 = torch.aten._convolution %3431, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
  %3433 = torch.aten.add.Tensor %3432, %3301, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3434 = torch.aten.clone %3433, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
  %3435 = torch.aten.view %3434, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
  %3436 = torch.aten._to_copy %3435, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
  %3437 = torch.aten.var.correction %3436, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
  %3438 = torch.aten.mean.dim %3436, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
  %3439 = torch.aten.add.Tensor %3437, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3440 = torch.aten.rsqrt %3439 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3441 = torch.aten.sub.Tensor %3435, %3438, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
  %3442 = torch.aten.mul.Tensor %3441, %3440 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
  %3443 = torch.aten.view %3442, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
  %3444 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3445 = torch.aten.unsqueeze %3444, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3446 = torch.aten.mul.Tensor %3443, %3445 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
  %3447 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
  %3448 = torch.aten.unsqueeze %3447, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
  %3449 = torch.aten.add.Tensor %3446, %3448, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
  %3450 = torch.aten._to_copy %3449, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
  %3451 = torch.aten.silu %3450 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
  %3452 = torch.aten._convolution %3451, %56, %57, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[4,320,3,3],f16>, !torch.vtensor<[4],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,4,64,64],f16>
  return %3452 : !torch.vtensor<[2,4,64,64],f16>
 }