Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created October 4, 2022 14:34
Show Gist options
  • Save pashu123/84a3a2130e6db9f97c670caf42256208 to your computer and use it in GitHub Desktop.
Save pashu123/84a3a2130e6db9f97c670caf42256208 to your computer and use it in GitHub Desktop.
// -----// IR Dump After DropShapeCalculations (torch-drop-shape-calculations) //----- //
func.func @forward(%arg0: !torch.vtensor<[2,4,64,64],f16>, %arg1: !torch.vtensor<[1],f16>, %arg2: !torch.vtensor<[2,77,768],f16>) -> !torch.vtensor<[2,4,64,64],f16> {
%int160 = torch.constant.int 160
%float1.600000e02 = torch.constant.float 1.600000e+02
%str = torch.constant.str "AssertionError: "
%int4 = torch.constant.int 4
%float0.000000e00 = torch.constant.float 0.000000e+00
%0 = torch.vtensor.literal(dense<0.079056941504209485> : tensor<f64>) : !torch.vtensor<[],f64>
%1 = torch.vtensor.literal(dense<0.11180339887498948> : tensor<f64>) : !torch.vtensor<[],f64>
%2 = torch.vtensor.literal(dense<0.15811388300841897> : tensor<f64>) : !torch.vtensor<[],f64>
%3 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64>
%4 = torch.vtensor.literal(dense<1.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64>
%5 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64>
%6 = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64>
%7 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64>
%8 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64>
%9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16>
%10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16>
%11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf16>) : !torch.vtensor<[640,320,3,3],f16>
%12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf16>) : !torch.vtensor<[640,320,1,1],f16>
%13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf16>) : !torch.vtensor<[1280,640,3,3],f16>
%14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf16>) : !torch.vtensor<[1280,640,1,1],f16>
%15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16>
%16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16>
%17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf16>) : !torch.vtensor<[1280,1920,3,3],f16>
%18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf16>) : !torch.vtensor<[1280,1920,1,1],f16>
%19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x768xf16>) : !torch.vtensor<[1280,768],f16>
%20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16>
%21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16>
%22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16>
%23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16>
%24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x1x1xf16>) : !torch.vtensor<[1280,1280,1,1],f16>
%25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16>
%26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16>
%27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf16>) : !torch.vtensor<[640,1920,3,3],f16>
%28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf16>) : !torch.vtensor<[640,1920,1,1],f16>
%29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16>
%30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf16>) : !torch.vtensor<[640,1280,3,3],f16>
%31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf16>) : !torch.vtensor<[640,1280,1,1],f16>
%32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf16>) : !torch.vtensor<[640,960,3,3],f16>
%33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16>
%34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf16>) : !torch.vtensor<[640,960,1,1],f16>
%35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x768xf16>) : !torch.vtensor<[640,768],f16>
%36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16>
%37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16>
%38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16>
%39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16>
%40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x1x1xf16>) : !torch.vtensor<[640,640,1,1],f16>
%41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16>
%42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16>
%43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf16>) : !torch.vtensor<[320,960,3,3],f16>
%44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf16>) : !torch.vtensor<[320,960,1,1],f16>
%45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16>
%46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16>
%47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16>
%48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16>
%49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x768xf16>) : !torch.vtensor<[320,768],f16>
%50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16>
%51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16>
%52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16>
%53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16>
%54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x1x1xf16>) : !torch.vtensor<[320,320,1,1],f16>
%55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16>
%56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16>
%57 = torch.vtensor.literal(dense<[-1.393320e-03, -1.588820e-03, -2.624990e-04, -2.531050e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16>
%int2 = torch.constant.int 2
%false = torch.constant.bool false
%int0 = torch.constant.int 0
%int6 = torch.constant.int 6
%none = torch.constant.none
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int1 = torch.constant.int 1
%int-1 = torch.constant.int -1
%int5 = torch.constant.int 5
%true = torch.constant.bool true
%int32 = torch.constant.int 32
%int10 = torch.constant.int 10
%int4096 = torch.constant.int 4096
%int3 = torch.constant.int 3
%int320 = torch.constant.int 320
%int64 = torch.constant.int 64
%int1310720 = torch.constant.int 1310720
%float1.000000e-05 = torch.constant.float 1.000000e-05
%int8192 = torch.constant.int 8192
%int8 = torch.constant.int 8
%int40 = torch.constant.int 40
%int16 = torch.constant.int 16
%int-2 = torch.constant.int -2
%int163840 = torch.constant.int 163840
%int16777216 = torch.constant.int 16777216
%int154 = torch.constant.int 154
%int768 = torch.constant.int 768
%int77 = torch.constant.int 77
%int24640 = torch.constant.int 24640
%int3080 = torch.constant.int 3080
%int315392 = torch.constant.int 315392
%int2560 = torch.constant.int 2560
%int1280 = torch.constant.int 1280
%str_0 = torch.constant.str "none"
%int20480 = torch.constant.int 20480
%int1024 = torch.constant.int 1024
%int20 = torch.constant.int 20
%int640 = torch.constant.int 640
%int655360 = torch.constant.int 655360
%int2048 = torch.constant.int 2048
%int80 = torch.constant.int 80
%int81920 = torch.constant.int 81920
%int1048576 = torch.constant.int 1048576
%int49280 = torch.constant.int 49280
%int6160 = torch.constant.int 6160
%int78848 = torch.constant.int 78848
%int5120 = torch.constant.int 5120
%int256 = torch.constant.int 256
%int327680 = torch.constant.int 327680
%int512 = torch.constant.int 512
%int40960 = torch.constant.int 40960
%int65536 = torch.constant.int 65536
%int98560 = torch.constant.int 98560
%int12320 = torch.constant.int 12320
%int19712 = torch.constant.int 19712
%int10240 = torch.constant.int 10240
%int128 = torch.constant.int 128
%int4928 = torch.constant.int 4928
%float2.000000e00 = torch.constant.float 2.000000e+00
%int60 = torch.constant.int 60
%int1920 = torch.constant.int 1920
%int30 = torch.constant.int 30
%int960 = torch.constant.int 960
%58 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%59 = torch.aten.expand %arg1, %58, %false : !torch.vtensor<[1],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[2],f16>
%cuda3A0 = torch.constant.device "cuda:0"
%60 = torch.aten.arange.start %int0, %int160, %int6, %none, %cuda3A0, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
%61 = torch.aten.mul.Tensor %60, %8 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32>
%62 = torch.aten.div.Tensor %61, %7 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32>
%63 = torch.aten.exp %62 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
%64 = torch.aten.slice.Tensor %59, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f16>
%65 = torch.aten.unsqueeze %64, %int1 : !torch.vtensor<[2],f16>, !torch.int -> !torch.vtensor<[2,1],f16>
%66 = torch.aten._to_copy %65, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1],f32>
%67 = torch.aten.unsqueeze %63, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
%68 = torch.aten.slice.Tensor %67, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,160],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,160],f32>
%69 = torch.aten.mul.Tensor %66, %68 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32>
%70 = torch.aten.mul.Tensor %69, %6 : !torch.vtensor<[2,160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,160],f32>
%71 = torch.aten.sin %70 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
%72 = torch.aten.cos %70 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
%73 = torch.prim.ListConstruct %71, %72 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
%74 = torch.aten.cat %73, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
%75 = torch.aten.slice.Tensor %74, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
%76 = torch.aten.slice.Tensor %75, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
%77 = torch.aten.slice.Tensor %74, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f32>
%78 = torch.aten.slice.Tensor %77, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f32>
%79 = torch.prim.ListConstruct %76, %78 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
%80 = torch.aten.cat %79, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
%81 = torch.aten._to_copy %80, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320],f16>
%82 = torch.aten.t %9 : !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[320,1280],f16>
%83 = torch.aten.addmm %29, %81, %82, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%84 = torch.aten.silu %83 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%85 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%86 = torch.aten.addmm %29, %84, %85, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%87 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%88 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
%89 = torch.aten._convolution %arg0, %10, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%90 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%91 = torch.aten.view %89, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%92 = torch.aten._to_copy %91, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%93 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
%94 = torch.aten.var.correction %92, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%95 = torch.aten.mean.dim %92, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%96 = torch.aten.add.Tensor %94, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%97 = torch.aten.rsqrt %96 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%98 = torch.aten.sub.Tensor %91, %95, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%99 = torch.aten.mul.Tensor %98, %97 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%100 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%101 = torch.aten.view %99, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%102 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%103 = torch.aten.unsqueeze %102, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%104 = torch.aten.mul.Tensor %101, %103 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%105 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%106 = torch.aten.unsqueeze %105, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%107 = torch.aten.add.Tensor %104, %106, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%108 = torch.aten._to_copy %107, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%109 = torch.aten.silu %108 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%110 = torch.aten._convolution %109, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%111 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%112 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%113 = torch.aten.addmm %55, %111, %112, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%114 = torch.aten.slice.Tensor %113, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%115 = torch.aten.slice.Tensor %114, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%116 = torch.aten.unsqueeze %115, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%117 = torch.aten.unsqueeze %116, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%118 = torch.aten.add.Tensor %110, %117, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%119 = torch.aten.view %118, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%120 = torch.aten._to_copy %119, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%121 = torch.aten.var.correction %120, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%122 = torch.aten.mean.dim %120, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%123 = torch.aten.add.Tensor %121, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%124 = torch.aten.rsqrt %123 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%125 = torch.aten.sub.Tensor %119, %122, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%126 = torch.aten.mul.Tensor %125, %124 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%127 = torch.aten.view %126, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%128 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%129 = torch.aten.unsqueeze %128, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%130 = torch.aten.mul.Tensor %127, %129 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%131 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%132 = torch.aten.unsqueeze %131, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%133 = torch.aten.add.Tensor %130, %132, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%134 = torch.aten._to_copy %133, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%135 = torch.aten.silu %134 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%136 = torch.aten._convolution %135, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%137 = torch.aten.add.Tensor %89, %136, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%138 = torch.aten.div.Tensor %137, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%139 = torch.aten.view %138, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%140 = torch.aten._to_copy %139, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%141 = torch.aten.var.correction %140, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%142 = torch.aten.mean.dim %140, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%143 = torch.aten.add.Tensor %141, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%144 = torch.aten.rsqrt %143 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%145 = torch.aten.sub.Tensor %139, %142, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%146 = torch.aten.mul.Tensor %145, %144 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%147 = torch.aten.view %146, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%148 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%149 = torch.aten.unsqueeze %148, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%150 = torch.aten.mul.Tensor %147, %149 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%151 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%152 = torch.aten.unsqueeze %151, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%153 = torch.aten.add.Tensor %150, %152, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%154 = torch.aten._to_copy %153, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%155 = torch.aten._convolution %154, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%156 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%157 = torch.aten.permute %155, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%158 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%159 = torch.prim.ListConstruct %int1310720, %int1, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%160 = torch.aten._reshape_alias %157, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%161 = torch.prim.ListConstruct %int320 : (!torch.int) -> !torch.list<int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %160, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%162 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%163 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
%164 = torch.prim.ListConstruct %int320, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%165 = torch.aten._reshape_alias %result0, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%166 = torch.aten.mm %165, %162 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%167 = torch.aten._unsafe_view %166, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%168 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%169 = torch.aten._reshape_alias %result0, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%170 = torch.aten.mm %169, %168 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%171 = torch.aten._unsafe_view %170, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%172 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%173 = torch.aten._reshape_alias %result0, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%174 = torch.aten.mm %173, %172 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%175 = torch.aten._unsafe_view %174, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%176 = torch.prim.ListConstruct %int2, %int4096, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%177 = torch.prim.ListConstruct %int1310720, %int320, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%178 = torch.aten._reshape_alias %167, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%179 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%180 = torch.aten.permute %178, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%181 = torch.aten.clone %180, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%182 = torch.prim.ListConstruct %int16, %int4096, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%183 = torch.aten._unsafe_view %181, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%184 = torch.aten._reshape_alias %171, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%185 = torch.aten.permute %184, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%186 = torch.aten.clone %185, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%187 = torch.aten._unsafe_view %186, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%188 = torch.aten._reshape_alias %175, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%189 = torch.aten.permute %188, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%190 = torch.aten.clone %189, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%191 = torch.aten._unsafe_view %190, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%192 = torch.aten.transpose.int %187, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%193 = torch.aten.expand %183, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%194 = torch.prim.ListConstruct %int163840, %int40, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%195 = torch.aten._reshape_alias %193, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%196 = torch.prim.ListConstruct %int16, %int40, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%197 = torch.aten.expand %192, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
%198 = torch.prim.ListConstruct %int163840, %int1, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%199 = torch.aten._reshape_alias %197, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%200 = torch.aten.bmm %195, %199 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%201 = torch.prim.ListConstruct %int16, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%202 = torch.aten._unsafe_view %200, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%203 = torch.aten.mul.Tensor %202, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%204 = torch.aten._softmax %203, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%205 = torch.aten.expand %204, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%206 = torch.prim.ListConstruct %int16777216, %int4096, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%207 = torch.aten._reshape_alias %205, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%208 = torch.aten.expand %191, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%209 = torch.aten._reshape_alias %208, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%210 = torch.aten.bmm %207, %209 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%211 = torch.aten._unsafe_view %210, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%212 = torch.prim.ListConstruct %int2, %int8, %int4096, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%213 = torch.prim.ListConstruct %int1310720, %int163840, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%214 = torch.aten._reshape_alias %211, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%215 = torch.aten.permute %214, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%216 = torch.aten.clone %215, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%217 = torch.aten._unsafe_view %216, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%218 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%219 = torch.aten.view %217, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%220 = torch.aten.addmm %55, %219, %218, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%221 = torch.aten.view %220, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%222 = torch.aten.add.Tensor %221, %160, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_1, %result1_2, %result2_3 = torch.aten.native_layer_norm %222, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%223 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%224 = torch.aten._reshape_alias %result0_1, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%225 = torch.aten.mm %224, %223 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%226 = torch.aten._unsafe_view %225, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%227 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%228 = torch.prim.ListConstruct %int154, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%229 = torch.prim.ListConstruct %int768, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%230 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%231 = torch.aten.mm %230, %227 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%232 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%233 = torch.aten._unsafe_view %231, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%234 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%235 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%236 = torch.aten.mm %235, %234 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%237 = torch.aten._unsafe_view %236, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%238 = torch.aten._reshape_alias %226, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%239 = torch.aten.permute %238, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%240 = torch.aten.clone %239, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%241 = torch.aten._unsafe_view %240, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%242 = torch.prim.ListConstruct %int2, %int77, %int8, %int40 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%243 = torch.prim.ListConstruct %int24640, %int320, %int40, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%244 = torch.aten._reshape_alias %233, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%245 = torch.aten.permute %244, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%246 = torch.aten.clone %245, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%247 = torch.prim.ListConstruct %int16, %int77, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%248 = torch.aten._unsafe_view %246, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%249 = torch.aten._reshape_alias %237, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%250 = torch.aten.permute %249, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%251 = torch.aten.clone %250, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%252 = torch.aten._unsafe_view %251, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%253 = torch.aten.transpose.int %248, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%254 = torch.aten.expand %241, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%255 = torch.aten._reshape_alias %254, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%256 = torch.prim.ListConstruct %int16, %int40, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%257 = torch.aten.expand %253, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
%258 = torch.prim.ListConstruct %int3080, %int1, %int40 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%259 = torch.aten._reshape_alias %257, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%260 = torch.aten.bmm %255, %259 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%261 = torch.prim.ListConstruct %int16, %int4096, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%262 = torch.aten._unsafe_view %260, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%263 = torch.aten.mul.Tensor %262, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%264 = torch.aten._softmax %263, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%265 = torch.aten.expand %264, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%266 = torch.prim.ListConstruct %int315392, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%267 = torch.aten._reshape_alias %265, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%268 = torch.aten.expand %252, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
%269 = torch.prim.ListConstruct %int3080, %int40, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%270 = torch.aten._reshape_alias %268, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%271 = torch.aten.bmm %267, %270 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%272 = torch.aten._unsafe_view %271, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%273 = torch.aten._reshape_alias %272, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%274 = torch.aten.permute %273, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%275 = torch.aten.clone %274, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%276 = torch.aten._unsafe_view %275, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%277 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%278 = torch.aten.view %276, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%279 = torch.aten.addmm %55, %278, %277, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%280 = torch.aten.view %279, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%281 = torch.aten.add.Tensor %280, %222, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_4, %result1_5, %result2_6 = torch.aten.native_layer_norm %281, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%282 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%283 = torch.aten.view %result0_4, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%284 = torch.aten.addmm %52, %283, %282, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
%285 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%286 = torch.aten.view %284, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%287 = torch.aten.slice.Tensor %286, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%288 = torch.aten.slice.Tensor %286, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%289 = torch.aten.gelu %288, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%290 = torch.aten.mul.Tensor %287, %289 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%291 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%292 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%293 = torch.aten.view %290, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%294 = torch.aten.addmm %55, %293, %291, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%295 = torch.aten.view %294, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%296 = torch.aten.add.Tensor %295, %281, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%297 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%298 = torch.prim.ListConstruct %int1310720, %int20480, %int320, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%299 = torch.aten._reshape_alias %296, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%300 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%301 = torch.aten.permute %299, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%302 = torch.aten._convolution %301, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%303 = torch.aten.add.Tensor %302, %138, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%304 = torch.aten.clone %303, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%305 = torch.aten.view %304, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%306 = torch.aten._to_copy %305, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%307 = torch.aten.var.correction %306, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%308 = torch.aten.mean.dim %306, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%309 = torch.aten.add.Tensor %307, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%310 = torch.aten.rsqrt %309 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%311 = torch.aten.sub.Tensor %305, %308, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%312 = torch.aten.mul.Tensor %311, %310 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%313 = torch.aten.view %312, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%314 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%315 = torch.aten.unsqueeze %314, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%316 = torch.aten.mul.Tensor %313, %315 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%317 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%318 = torch.aten.unsqueeze %317, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%319 = torch.aten.add.Tensor %316, %318, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%320 = torch.aten._to_copy %319, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%321 = torch.aten.silu %320 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%322 = torch.aten._convolution %321, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%323 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%324 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%325 = torch.aten.addmm %55, %323, %324, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%326 = torch.aten.slice.Tensor %325, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%327 = torch.aten.slice.Tensor %326, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%328 = torch.aten.unsqueeze %327, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%329 = torch.aten.unsqueeze %328, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%330 = torch.aten.add.Tensor %322, %329, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%331 = torch.aten.view %330, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%332 = torch.aten._to_copy %331, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%333 = torch.aten.var.correction %332, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%334 = torch.aten.mean.dim %332, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%335 = torch.aten.add.Tensor %333, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%336 = torch.aten.rsqrt %335 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%337 = torch.aten.sub.Tensor %331, %334, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%338 = torch.aten.mul.Tensor %337, %336 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%339 = torch.aten.view %338, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%340 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%341 = torch.aten.unsqueeze %340, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%342 = torch.aten.mul.Tensor %339, %341 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%343 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%344 = torch.aten.unsqueeze %343, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%345 = torch.aten.add.Tensor %342, %344, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%346 = torch.aten._to_copy %345, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%347 = torch.aten.silu %346 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%348 = torch.aten._convolution %347, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%349 = torch.aten.add.Tensor %303, %348, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%350 = torch.aten.div.Tensor %349, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%351 = torch.aten.clone %350, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%352 = torch.aten.view %351, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%353 = torch.aten._to_copy %352, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%354 = torch.aten.var.correction %353, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%355 = torch.aten.mean.dim %353, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%356 = torch.aten.add.Tensor %354, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%357 = torch.aten.rsqrt %356 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%358 = torch.aten.sub.Tensor %352, %355, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%359 = torch.aten.mul.Tensor %358, %357 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%360 = torch.aten.view %359, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%361 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%362 = torch.aten.unsqueeze %361, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%363 = torch.aten.mul.Tensor %360, %362 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%364 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%365 = torch.aten.unsqueeze %364, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%366 = torch.aten.add.Tensor %363, %365, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%367 = torch.aten._to_copy %366, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%368 = torch.aten._convolution %367, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%369 = torch.aten.permute %368, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%370 = torch.aten._reshape_alias %369, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%result0_7, %result1_8, %result2_9 = torch.aten.native_layer_norm %370, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%371 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%372 = torch.aten._reshape_alias %result0_7, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%373 = torch.aten.mm %372, %371 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%374 = torch.aten._unsafe_view %373, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%375 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%376 = torch.aten._reshape_alias %result0_7, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%377 = torch.aten.mm %376, %375 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%378 = torch.aten._unsafe_view %377, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%379 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%380 = torch.aten._reshape_alias %result0_7, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%381 = torch.aten.mm %380, %379 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%382 = torch.aten._unsafe_view %381, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%383 = torch.aten._reshape_alias %374, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%384 = torch.aten.permute %383, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%385 = torch.aten.clone %384, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%386 = torch.aten._unsafe_view %385, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%387 = torch.aten._reshape_alias %378, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%388 = torch.aten.permute %387, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%389 = torch.aten.clone %388, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%390 = torch.aten._unsafe_view %389, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%391 = torch.aten._reshape_alias %382, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%392 = torch.aten.permute %391, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%393 = torch.aten.clone %392, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%394 = torch.aten._unsafe_view %393, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%395 = torch.aten.transpose.int %390, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%396 = torch.aten.expand %386, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%397 = torch.aten._reshape_alias %396, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%398 = torch.aten.expand %395, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
%399 = torch.aten._reshape_alias %398, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%400 = torch.aten.bmm %397, %399 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%401 = torch.aten._unsafe_view %400, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%402 = torch.aten.mul.Tensor %401, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%403 = torch.aten._softmax %402, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%404 = torch.aten.expand %403, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%405 = torch.aten._reshape_alias %404, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%406 = torch.aten.expand %394, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%407 = torch.aten._reshape_alias %406, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%408 = torch.aten.bmm %405, %407 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%409 = torch.aten._unsafe_view %408, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%410 = torch.aten._reshape_alias %409, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%411 = torch.aten.permute %410, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%412 = torch.aten.clone %411, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%413 = torch.aten._unsafe_view %412, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%414 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%415 = torch.aten.view %413, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%416 = torch.aten.addmm %55, %415, %414, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%417 = torch.aten.view %416, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%418 = torch.aten.add.Tensor %417, %370, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_10, %result1_11, %result2_12 = torch.aten.native_layer_norm %418, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%419 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%420 = torch.aten._reshape_alias %result0_10, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%421 = torch.aten.mm %420, %419 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%422 = torch.aten._unsafe_view %421, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%423 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%424 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%425 = torch.aten.mm %424, %423 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%426 = torch.aten._unsafe_view %425, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%427 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%428 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%429 = torch.aten.mm %428, %427 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%430 = torch.aten._unsafe_view %429, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%431 = torch.aten._reshape_alias %422, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%432 = torch.aten.permute %431, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%433 = torch.aten.clone %432, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%434 = torch.aten._unsafe_view %433, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%435 = torch.aten._reshape_alias %426, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%436 = torch.aten.permute %435, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%437 = torch.aten.clone %436, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%438 = torch.aten._unsafe_view %437, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%439 = torch.aten._reshape_alias %430, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%440 = torch.aten.permute %439, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%441 = torch.aten.clone %440, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%442 = torch.aten._unsafe_view %441, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%443 = torch.aten.transpose.int %438, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%444 = torch.aten.expand %434, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%445 = torch.aten._reshape_alias %444, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%446 = torch.aten.expand %443, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
%447 = torch.aten._reshape_alias %446, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%448 = torch.aten.bmm %445, %447 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%449 = torch.aten._unsafe_view %448, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%450 = torch.aten.mul.Tensor %449, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%451 = torch.aten._softmax %450, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%452 = torch.aten.expand %451, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%453 = torch.aten._reshape_alias %452, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%454 = torch.aten.expand %442, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
%455 = torch.aten._reshape_alias %454, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%456 = torch.aten.bmm %453, %455 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%457 = torch.aten._unsafe_view %456, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%458 = torch.aten._reshape_alias %457, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%459 = torch.aten.permute %458, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%460 = torch.aten.clone %459, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%461 = torch.aten._unsafe_view %460, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%462 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%463 = torch.aten.view %461, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%464 = torch.aten.addmm %55, %463, %462, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%465 = torch.aten.view %464, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%466 = torch.aten.add.Tensor %465, %418, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_13, %result1_14, %result2_15 = torch.aten.native_layer_norm %466, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%467 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%468 = torch.aten.view %result0_13, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%469 = torch.aten.addmm %52, %468, %467, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
%470 = torch.aten.view %469, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%471 = torch.aten.slice.Tensor %470, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%472 = torch.aten.slice.Tensor %470, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%473 = torch.aten.gelu %472, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%474 = torch.aten.mul.Tensor %471, %473 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%475 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%476 = torch.aten.view %474, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%477 = torch.aten.addmm %55, %476, %475, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%478 = torch.aten.view %477, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%479 = torch.aten.add.Tensor %478, %466, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%480 = torch.aten._reshape_alias %479, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%481 = torch.aten.permute %480, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%482 = torch.aten._convolution %481, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%483 = torch.aten.add.Tensor %482, %350, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%484 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
%485 = torch.aten._convolution %483, %47, %55, %484, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,32,32],f16>
%486 = torch.aten.clone %485, %int0 : !torch.vtensor<[2,320,32,32],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16>
%487 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%488 = torch.aten.view %486, %487 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16>
%489 = torch.aten._to_copy %488, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f32>
%490 = torch.aten.var.correction %489, %93, %int0, %true : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%491 = torch.aten.mean.dim %489, %93, %true, %none : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%492 = torch.aten.add.Tensor %490, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%493 = torch.aten.rsqrt %492 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%494 = torch.aten.sub.Tensor %488, %491, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,1024],f32>
%495 = torch.aten.mul.Tensor %494, %493 : !torch.vtensor<[2,32,10,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,1024],f32>
%496 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%497 = torch.aten.view %495, %496 : !torch.vtensor<[2,32,10,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f32>
%498 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%499 = torch.aten.unsqueeze %498, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%500 = torch.aten.mul.Tensor %497, %499 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f32>
%501 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%502 = torch.aten.unsqueeze %501, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%503 = torch.aten.add.Tensor %500, %502, %int1 : !torch.vtensor<[2,320,32,32],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f32>
%504 = torch.aten._to_copy %503, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,32,32],f16>
%505 = torch.aten.silu %504 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16>
%506 = torch.aten._convolution %505, %11, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%507 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%508 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%509 = torch.aten.addmm %45, %507, %508, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%510 = torch.aten.slice.Tensor %509, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%511 = torch.aten.slice.Tensor %510, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%512 = torch.aten.unsqueeze %511, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%513 = torch.aten.unsqueeze %512, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%514 = torch.aten.add.Tensor %506, %513, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%515 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%516 = torch.aten.view %514, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%517 = torch.aten._to_copy %516, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%518 = torch.aten.var.correction %517, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%519 = torch.aten.mean.dim %517, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%520 = torch.aten.add.Tensor %518, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%521 = torch.aten.rsqrt %520 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%522 = torch.aten.sub.Tensor %516, %519, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%523 = torch.aten.mul.Tensor %522, %521 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%524 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%525 = torch.aten.view %523, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%526 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%527 = torch.aten.unsqueeze %526, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%528 = torch.aten.mul.Tensor %525, %527 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%529 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%530 = torch.aten.unsqueeze %529, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%531 = torch.aten.add.Tensor %528, %530, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%532 = torch.aten._to_copy %531, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%533 = torch.aten.silu %532 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%534 = torch.aten._convolution %533, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%535 = torch.aten._convolution %485, %12, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%536 = torch.aten.add.Tensor %535, %534, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%537 = torch.aten.div.Tensor %536, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%538 = torch.aten.clone %537, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%539 = torch.aten.view %538, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%540 = torch.aten._to_copy %539, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%541 = torch.aten.var.correction %540, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%542 = torch.aten.mean.dim %540, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%543 = torch.aten.add.Tensor %541, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%544 = torch.aten.rsqrt %543 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%545 = torch.aten.sub.Tensor %539, %542, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%546 = torch.aten.mul.Tensor %545, %544 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%547 = torch.aten.view %546, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%548 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%549 = torch.aten.unsqueeze %548, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%550 = torch.aten.mul.Tensor %547, %549 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%551 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%552 = torch.aten.unsqueeze %551, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%553 = torch.aten.add.Tensor %550, %552, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%554 = torch.aten._to_copy %553, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%555 = torch.aten._convolution %554, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%556 = torch.aten.permute %555, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%557 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%558 = torch.prim.ListConstruct %int655360, %int1, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%559 = torch.aten._reshape_alias %556, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%560 = torch.prim.ListConstruct %int640 : (!torch.int) -> !torch.list<int>
%result0_16, %result1_17, %result2_18 = torch.aten.native_layer_norm %559, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%561 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%562 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
%563 = torch.prim.ListConstruct %int640, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%564 = torch.aten._reshape_alias %result0_16, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%565 = torch.aten.mm %564, %561 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%566 = torch.aten._unsafe_view %565, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%567 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%568 = torch.aten._reshape_alias %result0_16, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%569 = torch.aten.mm %568, %567 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%570 = torch.aten._unsafe_view %569, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%571 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%572 = torch.aten._reshape_alias %result0_16, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%573 = torch.aten.mm %572, %571 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%574 = torch.aten._unsafe_view %573, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%575 = torch.prim.ListConstruct %int2, %int1024, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%576 = torch.prim.ListConstruct %int655360, %int640, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%577 = torch.aten._reshape_alias %566, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%578 = torch.aten.permute %577, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%579 = torch.aten.clone %578, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%580 = torch.prim.ListConstruct %int16, %int1024, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%581 = torch.aten._unsafe_view %579, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%582 = torch.aten._reshape_alias %570, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%583 = torch.aten.permute %582, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%584 = torch.aten.clone %583, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%585 = torch.aten._unsafe_view %584, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%586 = torch.aten._reshape_alias %574, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%587 = torch.aten.permute %586, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%588 = torch.aten.clone %587, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%589 = torch.aten._unsafe_view %588, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%590 = torch.aten.transpose.int %585, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%591 = torch.aten.expand %581, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%592 = torch.prim.ListConstruct %int81920, %int80, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%593 = torch.aten._reshape_alias %591, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%594 = torch.prim.ListConstruct %int16, %int80, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%595 = torch.aten.expand %590, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
%596 = torch.prim.ListConstruct %int81920, %int1, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%597 = torch.aten._reshape_alias %595, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%598 = torch.aten.bmm %593, %597 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%599 = torch.prim.ListConstruct %int16, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%600 = torch.aten._unsafe_view %598, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%601 = torch.aten.mul.Tensor %600, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%602 = torch.aten._softmax %601, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%603 = torch.aten.expand %602, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%604 = torch.prim.ListConstruct %int1048576, %int1024, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%605 = torch.aten._reshape_alias %603, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%606 = torch.aten.expand %589, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%607 = torch.aten._reshape_alias %606, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%608 = torch.aten.bmm %605, %607 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%609 = torch.aten._unsafe_view %608, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%610 = torch.prim.ListConstruct %int2, %int8, %int1024, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%611 = torch.prim.ListConstruct %int655360, %int81920, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%612 = torch.aten._reshape_alias %609, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%613 = torch.aten.permute %612, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%614 = torch.aten.clone %613, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%615 = torch.aten._unsafe_view %614, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%616 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%617 = torch.aten.view %615, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%618 = torch.aten.addmm %45, %617, %616, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%619 = torch.aten.view %618, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%620 = torch.aten.add.Tensor %619, %559, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_19, %result1_20, %result2_21 = torch.aten.native_layer_norm %620, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%621 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%622 = torch.aten._reshape_alias %result0_19, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%623 = torch.aten.mm %622, %621 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%624 = torch.aten._unsafe_view %623, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%625 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%626 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%627 = torch.aten.mm %626, %625 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%628 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%629 = torch.aten._unsafe_view %627, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%630 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%631 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%632 = torch.aten.mm %631, %630 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%633 = torch.aten._unsafe_view %632, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%634 = torch.aten._reshape_alias %624, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%635 = torch.aten.permute %634, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%636 = torch.aten.clone %635, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%637 = torch.aten._unsafe_view %636, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%638 = torch.prim.ListConstruct %int2, %int77, %int8, %int80 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%639 = torch.prim.ListConstruct %int49280, %int640, %int80, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%640 = torch.aten._reshape_alias %629, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%641 = torch.aten.permute %640, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%642 = torch.aten.clone %641, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%643 = torch.prim.ListConstruct %int16, %int77, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%644 = torch.aten._unsafe_view %642, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%645 = torch.aten._reshape_alias %633, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%646 = torch.aten.permute %645, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%647 = torch.aten.clone %646, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%648 = torch.aten._unsafe_view %647, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%649 = torch.aten.transpose.int %644, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%650 = torch.aten.expand %637, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%651 = torch.aten._reshape_alias %650, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%652 = torch.prim.ListConstruct %int16, %int80, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%653 = torch.aten.expand %649, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
%654 = torch.prim.ListConstruct %int6160, %int1, %int80 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%655 = torch.aten._reshape_alias %653, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%656 = torch.aten.bmm %651, %655 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%657 = torch.prim.ListConstruct %int16, %int1024, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%658 = torch.aten._unsafe_view %656, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%659 = torch.aten.mul.Tensor %658, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%660 = torch.aten._softmax %659, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%661 = torch.aten.expand %660, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%662 = torch.prim.ListConstruct %int78848, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%663 = torch.aten._reshape_alias %661, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%664 = torch.aten.expand %648, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
%665 = torch.prim.ListConstruct %int6160, %int80, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%666 = torch.aten._reshape_alias %664, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%667 = torch.aten.bmm %663, %666 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%668 = torch.aten._unsafe_view %667, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%669 = torch.aten._reshape_alias %668, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%670 = torch.aten.permute %669, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%671 = torch.aten.clone %670, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%672 = torch.aten._unsafe_view %671, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%673 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%674 = torch.aten.view %672, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%675 = torch.aten.addmm %45, %674, %673, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%676 = torch.aten.view %675, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%677 = torch.aten.add.Tensor %676, %620, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_22, %result1_23, %result2_24 = torch.aten.native_layer_norm %677, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%678 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%679 = torch.aten.view %result0_22, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%680 = torch.aten.addmm %38, %679, %678, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
%681 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%682 = torch.aten.view %680, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%683 = torch.aten.slice.Tensor %682, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%684 = torch.aten.slice.Tensor %682, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%685 = torch.aten.gelu %684, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%686 = torch.aten.mul.Tensor %683, %685 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%687 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%688 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
%689 = torch.aten.view %686, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%690 = torch.aten.addmm %45, %689, %687, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%691 = torch.aten.view %690, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%692 = torch.aten.add.Tensor %691, %677, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%693 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%694 = torch.prim.ListConstruct %int655360, %int20480, %int640, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%695 = torch.aten._reshape_alias %692, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%696 = torch.aten.permute %695, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%697 = torch.aten._convolution %696, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%698 = torch.aten.add.Tensor %697, %537, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%699 = torch.aten.clone %698, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%700 = torch.aten.view %699, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%701 = torch.aten._to_copy %700, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%702 = torch.aten.var.correction %701, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%703 = torch.aten.mean.dim %701, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%704 = torch.aten.add.Tensor %702, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%705 = torch.aten.rsqrt %704 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%706 = torch.aten.sub.Tensor %700, %703, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%707 = torch.aten.mul.Tensor %706, %705 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%708 = torch.aten.view %707, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%709 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%710 = torch.aten.unsqueeze %709, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%711 = torch.aten.mul.Tensor %708, %710 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%712 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%713 = torch.aten.unsqueeze %712, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%714 = torch.aten.add.Tensor %711, %713, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%715 = torch.aten._to_copy %714, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%716 = torch.aten.silu %715 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%717 = torch.aten._convolution %716, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%718 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%719 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%720 = torch.aten.addmm %45, %718, %719, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%721 = torch.aten.slice.Tensor %720, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%722 = torch.aten.slice.Tensor %721, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%723 = torch.aten.unsqueeze %722, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%724 = torch.aten.unsqueeze %723, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%725 = torch.aten.add.Tensor %717, %724, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%726 = torch.aten.view %725, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%727 = torch.aten._to_copy %726, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%728 = torch.aten.var.correction %727, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%729 = torch.aten.mean.dim %727, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%730 = torch.aten.add.Tensor %728, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%731 = torch.aten.rsqrt %730 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%732 = torch.aten.sub.Tensor %726, %729, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%733 = torch.aten.mul.Tensor %732, %731 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%734 = torch.aten.view %733, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%735 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%736 = torch.aten.unsqueeze %735, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%737 = torch.aten.mul.Tensor %734, %736 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%738 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%739 = torch.aten.unsqueeze %738, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%740 = torch.aten.add.Tensor %737, %739, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%741 = torch.aten._to_copy %740, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%742 = torch.aten.silu %741 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%743 = torch.aten._convolution %742, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%744 = torch.aten.add.Tensor %698, %743, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%745 = torch.aten.div.Tensor %744, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%746 = torch.aten.clone %745, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%747 = torch.aten.view %746, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%748 = torch.aten._to_copy %747, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%749 = torch.aten.var.correction %748, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%750 = torch.aten.mean.dim %748, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%751 = torch.aten.add.Tensor %749, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%752 = torch.aten.rsqrt %751 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%753 = torch.aten.sub.Tensor %747, %750, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%754 = torch.aten.mul.Tensor %753, %752 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%755 = torch.aten.view %754, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%756 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%757 = torch.aten.unsqueeze %756, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%758 = torch.aten.mul.Tensor %755, %757 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%759 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%760 = torch.aten.unsqueeze %759, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%761 = torch.aten.add.Tensor %758, %760, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%762 = torch.aten._to_copy %761, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%763 = torch.aten._convolution %762, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%764 = torch.aten.permute %763, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%765 = torch.aten._reshape_alias %764, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%result0_25, %result1_26, %result2_27 = torch.aten.native_layer_norm %765, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%766 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%767 = torch.aten._reshape_alias %result0_25, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%768 = torch.aten.mm %767, %766 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%769 = torch.aten._unsafe_view %768, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%770 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%771 = torch.aten._reshape_alias %result0_25, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%772 = torch.aten.mm %771, %770 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%773 = torch.aten._unsafe_view %772, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%774 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%775 = torch.aten._reshape_alias %result0_25, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%776 = torch.aten.mm %775, %774 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%777 = torch.aten._unsafe_view %776, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%778 = torch.aten._reshape_alias %769, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%779 = torch.aten.permute %778, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%780 = torch.aten.clone %779, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%781 = torch.aten._unsafe_view %780, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%782 = torch.aten._reshape_alias %773, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%783 = torch.aten.permute %782, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%784 = torch.aten.clone %783, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%785 = torch.aten._unsafe_view %784, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%786 = torch.aten._reshape_alias %777, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%787 = torch.aten.permute %786, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%788 = torch.aten.clone %787, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%789 = torch.aten._unsafe_view %788, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%790 = torch.aten.transpose.int %785, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%791 = torch.aten.expand %781, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%792 = torch.aten._reshape_alias %791, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%793 = torch.aten.expand %790, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
%794 = torch.aten._reshape_alias %793, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%795 = torch.aten.bmm %792, %794 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%796 = torch.aten._unsafe_view %795, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%797 = torch.aten.mul.Tensor %796, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%798 = torch.aten._softmax %797, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%799 = torch.aten.expand %798, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%800 = torch.aten._reshape_alias %799, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%801 = torch.aten.expand %789, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%802 = torch.aten._reshape_alias %801, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%803 = torch.aten.bmm %800, %802 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%804 = torch.aten._unsafe_view %803, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%805 = torch.aten._reshape_alias %804, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%806 = torch.aten.permute %805, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%807 = torch.aten.clone %806, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%808 = torch.aten._unsafe_view %807, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%809 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%810 = torch.aten.view %808, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%811 = torch.aten.addmm %45, %810, %809, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%812 = torch.aten.view %811, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%813 = torch.aten.add.Tensor %812, %765, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_28, %result1_29, %result2_30 = torch.aten.native_layer_norm %813, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%814 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%815 = torch.aten._reshape_alias %result0_28, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%816 = torch.aten.mm %815, %814 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%817 = torch.aten._unsafe_view %816, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%818 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%819 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%820 = torch.aten.mm %819, %818 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%821 = torch.aten._unsafe_view %820, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%822 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%823 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%824 = torch.aten.mm %823, %822 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%825 = torch.aten._unsafe_view %824, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%826 = torch.aten._reshape_alias %817, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%827 = torch.aten.permute %826, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%828 = torch.aten.clone %827, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%829 = torch.aten._unsafe_view %828, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%830 = torch.aten._reshape_alias %821, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%831 = torch.aten.permute %830, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%832 = torch.aten.clone %831, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%833 = torch.aten._unsafe_view %832, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%834 = torch.aten._reshape_alias %825, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%835 = torch.aten.permute %834, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%836 = torch.aten.clone %835, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%837 = torch.aten._unsafe_view %836, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%838 = torch.aten.transpose.int %833, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%839 = torch.aten.expand %829, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%840 = torch.aten._reshape_alias %839, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%841 = torch.aten.expand %838, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
%842 = torch.aten._reshape_alias %841, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%843 = torch.aten.bmm %840, %842 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%844 = torch.aten._unsafe_view %843, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%845 = torch.aten.mul.Tensor %844, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%846 = torch.aten._softmax %845, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%847 = torch.aten.expand %846, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%848 = torch.aten._reshape_alias %847, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%849 = torch.aten.expand %837, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
%850 = torch.aten._reshape_alias %849, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%851 = torch.aten.bmm %848, %850 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%852 = torch.aten._unsafe_view %851, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%853 = torch.aten._reshape_alias %852, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%854 = torch.aten.permute %853, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%855 = torch.aten.clone %854, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%856 = torch.aten._unsafe_view %855, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%857 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%858 = torch.aten.view %856, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%859 = torch.aten.addmm %45, %858, %857, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%860 = torch.aten.view %859, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%861 = torch.aten.add.Tensor %860, %813, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_31, %result1_32, %result2_33 = torch.aten.native_layer_norm %861, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%862 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%863 = torch.aten.view %result0_31, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%864 = torch.aten.addmm %38, %863, %862, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
%865 = torch.aten.view %864, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%866 = torch.aten.slice.Tensor %865, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%867 = torch.aten.slice.Tensor %865, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%868 = torch.aten.gelu %867, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%869 = torch.aten.mul.Tensor %866, %868 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%870 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%871 = torch.aten.view %869, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%872 = torch.aten.addmm %45, %871, %870, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%873 = torch.aten.view %872, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%874 = torch.aten.add.Tensor %873, %861, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%875 = torch.aten._reshape_alias %874, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%876 = torch.aten.permute %875, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%877 = torch.aten._convolution %876, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%878 = torch.aten.add.Tensor %877, %745, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%879 = torch.aten._convolution %878, %41, %45, %484, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,16,16],f16>
%880 = torch.aten.clone %879, %int0 : !torch.vtensor<[2,640,16,16],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16>
%881 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%882 = torch.aten.view %880, %881 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16>
%883 = torch.aten._to_copy %882, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f32>
%884 = torch.aten.var.correction %883, %93, %int0, %true : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%885 = torch.aten.mean.dim %883, %93, %true, %none : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%886 = torch.aten.add.Tensor %884, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%887 = torch.aten.rsqrt %886 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%888 = torch.aten.sub.Tensor %882, %885, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,256],f32>
%889 = torch.aten.mul.Tensor %888, %887 : !torch.vtensor<[2,32,20,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,256],f32>
%890 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%891 = torch.aten.view %889, %890 : !torch.vtensor<[2,32,20,256],f32>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f32>
%892 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%893 = torch.aten.unsqueeze %892, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%894 = torch.aten.mul.Tensor %891, %893 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f32>
%895 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%896 = torch.aten.unsqueeze %895, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%897 = torch.aten.add.Tensor %894, %896, %int1 : !torch.vtensor<[2,640,16,16],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f32>
%898 = torch.aten._to_copy %897, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,16,16],f16>
%899 = torch.aten.silu %898 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16>
%900 = torch.aten._convolution %899, %13, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%901 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%902 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%903 = torch.aten.addmm %29, %901, %902, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%904 = torch.aten.slice.Tensor %903, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%905 = torch.aten.slice.Tensor %904, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%906 = torch.aten.unsqueeze %905, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%907 = torch.aten.unsqueeze %906, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%908 = torch.aten.add.Tensor %900, %907, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%909 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%910 = torch.aten.view %908, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%911 = torch.aten._to_copy %910, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%912 = torch.aten.var.correction %911, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%913 = torch.aten.mean.dim %911, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%914 = torch.aten.add.Tensor %912, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%915 = torch.aten.rsqrt %914 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%916 = torch.aten.sub.Tensor %910, %913, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%917 = torch.aten.mul.Tensor %916, %915 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%918 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%919 = torch.aten.view %917, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%920 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%921 = torch.aten.unsqueeze %920, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%922 = torch.aten.mul.Tensor %919, %921 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%923 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%924 = torch.aten.unsqueeze %923, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%925 = torch.aten.add.Tensor %922, %924, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%926 = torch.aten._to_copy %925, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%927 = torch.aten.silu %926 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%928 = torch.aten._convolution %927, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%929 = torch.aten._convolution %879, %14, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%930 = torch.aten.add.Tensor %929, %928, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%931 = torch.aten.div.Tensor %930, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%932 = torch.aten.clone %931, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%933 = torch.aten.view %932, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%934 = torch.aten._to_copy %933, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%935 = torch.aten.var.correction %934, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%936 = torch.aten.mean.dim %934, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%937 = torch.aten.add.Tensor %935, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%938 = torch.aten.rsqrt %937 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%939 = torch.aten.sub.Tensor %933, %936, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%940 = torch.aten.mul.Tensor %939, %938 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%941 = torch.aten.view %940, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%942 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%943 = torch.aten.unsqueeze %942, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%944 = torch.aten.mul.Tensor %941, %943 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%945 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%946 = torch.aten.unsqueeze %945, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%947 = torch.aten.add.Tensor %944, %946, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%948 = torch.aten._to_copy %947, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%949 = torch.aten._convolution %948, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%950 = torch.aten.permute %949, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%951 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%952 = torch.prim.ListConstruct %int327680, %int1, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%953 = torch.aten._reshape_alias %950, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%954 = torch.prim.ListConstruct %int1280 : (!torch.int) -> !torch.list<int>
%result0_34, %result1_35, %result2_36 = torch.aten.native_layer_norm %953, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%955 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%956 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%957 = torch.prim.ListConstruct %int1280, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%958 = torch.aten._reshape_alias %result0_34, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%959 = torch.aten.mm %958, %955 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%960 = torch.aten._unsafe_view %959, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%961 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%962 = torch.aten._reshape_alias %result0_34, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%963 = torch.aten.mm %962, %961 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%964 = torch.aten._unsafe_view %963, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%965 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%966 = torch.aten._reshape_alias %result0_34, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%967 = torch.aten.mm %966, %965 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%968 = torch.aten._unsafe_view %967, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%969 = torch.prim.ListConstruct %int2, %int256, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%970 = torch.prim.ListConstruct %int327680, %int1280, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%971 = torch.aten._reshape_alias %960, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%972 = torch.aten.permute %971, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%973 = torch.aten.clone %972, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%974 = torch.prim.ListConstruct %int16, %int256, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%975 = torch.aten._unsafe_view %973, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%976 = torch.aten._reshape_alias %964, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%977 = torch.aten.permute %976, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%978 = torch.aten.clone %977, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%979 = torch.aten._unsafe_view %978, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%980 = torch.aten._reshape_alias %968, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%981 = torch.aten.permute %980, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%982 = torch.aten.clone %981, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%983 = torch.aten._unsafe_view %982, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%984 = torch.aten.transpose.int %979, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%985 = torch.aten.expand %975, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%986 = torch.prim.ListConstruct %int40960, %int160, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%987 = torch.aten._reshape_alias %985, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%988 = torch.prim.ListConstruct %int16, %int160, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%989 = torch.aten.expand %984, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
%990 = torch.prim.ListConstruct %int40960, %int1, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%991 = torch.aten._reshape_alias %989, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%992 = torch.aten.bmm %987, %991 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%993 = torch.prim.ListConstruct %int16, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%994 = torch.aten._unsafe_view %992, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%995 = torch.aten.mul.Tensor %994, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%996 = torch.aten._softmax %995, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%997 = torch.aten.expand %996, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%998 = torch.prim.ListConstruct %int65536, %int256, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%999 = torch.aten._reshape_alias %997, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%1000 = torch.aten.expand %983, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1001 = torch.aten._reshape_alias %1000, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1002 = torch.aten.bmm %999, %1001 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%1003 = torch.aten._unsafe_view %1002, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1004 = torch.prim.ListConstruct %int2, %int8, %int256, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1005 = torch.prim.ListConstruct %int327680, %int40960, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1006 = torch.aten._reshape_alias %1003, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1007 = torch.aten.permute %1006, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1008 = torch.aten.clone %1007, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%1009 = torch.aten._unsafe_view %1008, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1010 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1011 = torch.aten.view %1009, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1012 = torch.aten.addmm %29, %1011, %1010, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1013 = torch.aten.view %1012, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1014 = torch.aten.add.Tensor %1013, %953, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_37, %result1_38, %result2_39 = torch.aten.native_layer_norm %1014, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1015 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1016 = torch.aten._reshape_alias %result0_37, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1017 = torch.aten.mm %1016, %1015 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1018 = torch.aten._unsafe_view %1017, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1019 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1020 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1021 = torch.aten.mm %1020, %1019 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1022 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1023 = torch.aten._unsafe_view %1021, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1024 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1025 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1026 = torch.aten.mm %1025, %1024 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1027 = torch.aten._unsafe_view %1026, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1028 = torch.aten._reshape_alias %1018, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1029 = torch.aten.permute %1028, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1030 = torch.aten.clone %1029, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1031 = torch.aten._unsafe_view %1030, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1032 = torch.prim.ListConstruct %int2, %int77, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1033 = torch.prim.ListConstruct %int98560, %int1280, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1034 = torch.aten._reshape_alias %1023, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1035 = torch.aten.permute %1034, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1036 = torch.aten.clone %1035, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1037 = torch.prim.ListConstruct %int16, %int77, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1038 = torch.aten._unsafe_view %1036, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1039 = torch.aten._reshape_alias %1027, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1040 = torch.aten.permute %1039, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1041 = torch.aten.clone %1040, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1042 = torch.aten._unsafe_view %1041, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1043 = torch.aten.transpose.int %1038, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%1044 = torch.aten.expand %1031, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1045 = torch.aten._reshape_alias %1044, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1046 = torch.prim.ListConstruct %int16, %int160, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1047 = torch.aten.expand %1043, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
%1048 = torch.prim.ListConstruct %int12320, %int1, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1049 = torch.aten._reshape_alias %1047, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%1050 = torch.aten.bmm %1045, %1049 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%1051 = torch.prim.ListConstruct %int16, %int256, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1052 = torch.aten._unsafe_view %1050, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%1053 = torch.aten.mul.Tensor %1052, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%1054 = torch.aten._softmax %1053, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%1055 = torch.aten.expand %1054, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%1056 = torch.prim.ListConstruct %int19712, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1057 = torch.aten._reshape_alias %1055, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%1058 = torch.aten.expand %1042, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
%1059 = torch.prim.ListConstruct %int12320, %int160, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1060 = torch.aten._reshape_alias %1058, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1061 = torch.aten.bmm %1057, %1060 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%1062 = torch.aten._unsafe_view %1061, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1063 = torch.aten._reshape_alias %1062, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1064 = torch.aten.permute %1063, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1065 = torch.aten.clone %1064, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%1066 = torch.aten._unsafe_view %1065, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1067 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1068 = torch.aten.view %1066, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1069 = torch.aten.addmm %29, %1068, %1067, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1070 = torch.aten.view %1069, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1071 = torch.aten.add.Tensor %1070, %1014, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_40, %result1_41, %result2_42 = torch.aten.native_layer_norm %1071, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1072 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%1073 = torch.aten.view %result0_40, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1074 = torch.aten.addmm %22, %1073, %1072, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
%1075 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1076 = torch.aten.view %1074, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%1077 = torch.aten.slice.Tensor %1076, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%1078 = torch.aten.slice.Tensor %1076, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%1079 = torch.aten.gelu %1078, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%1080 = torch.aten.mul.Tensor %1077, %1079 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%1081 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%1082 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
%1083 = torch.aten.view %1080, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%1084 = torch.aten.addmm %29, %1083, %1081, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1085 = torch.aten.view %1084, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1086 = torch.aten.add.Tensor %1085, %1071, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%1087 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1088 = torch.prim.ListConstruct %int327680, %int20480, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1089 = torch.aten._reshape_alias %1086, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%1090 = torch.aten.permute %1089, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%1091 = torch.aten._convolution %1090, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1092 = torch.aten.add.Tensor %1091, %931, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1093 = torch.aten.clone %1092, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1094 = torch.aten.view %1093, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%1095 = torch.aten._to_copy %1094, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%1096 = torch.aten.var.correction %1095, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1097 = torch.aten.mean.dim %1095, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1098 = torch.aten.add.Tensor %1096, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1099 = torch.aten.rsqrt %1098 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1100 = torch.aten.sub.Tensor %1094, %1097, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%1101 = torch.aten.mul.Tensor %1100, %1099 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%1102 = torch.aten.view %1101, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%1103 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1104 = torch.aten.unsqueeze %1103, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1105 = torch.aten.mul.Tensor %1102, %1104 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%1106 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1107 = torch.aten.unsqueeze %1106, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1108 = torch.aten.add.Tensor %1105, %1107, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%1109 = torch.aten._to_copy %1108, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%1110 = torch.aten.silu %1109 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%1111 = torch.aten._convolution %1110, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1112 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1113 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1114 = torch.aten.addmm %29, %1112, %1113, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1115 = torch.aten.slice.Tensor %1114, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1116 = torch.aten.slice.Tensor %1115, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1117 = torch.aten.unsqueeze %1116, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1118 = torch.aten.unsqueeze %1117, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1119 = torch.aten.add.Tensor %1111, %1118, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1120 = torch.aten.view %1119, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%1121 = torch.aten._to_copy %1120, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%1122 = torch.aten.var.correction %1121, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1123 = torch.aten.mean.dim %1121, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1124 = torch.aten.add.Tensor %1122, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1125 = torch.aten.rsqrt %1124 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1126 = torch.aten.sub.Tensor %1120, %1123, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%1127 = torch.aten.mul.Tensor %1126, %1125 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%1128 = torch.aten.view %1127, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%1129 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1130 = torch.aten.unsqueeze %1129, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1131 = torch.aten.mul.Tensor %1128, %1130 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%1132 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1133 = torch.aten.unsqueeze %1132, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1134 = torch.aten.add.Tensor %1131, %1133, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%1135 = torch.aten._to_copy %1134, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%1136 = torch.aten.silu %1135 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%1137 = torch.aten._convolution %1136, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1138 = torch.aten.add.Tensor %1092, %1137, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1139 = torch.aten.div.Tensor %1138, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%1140 = torch.aten.clone %1139, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1141 = torch.aten.view %1140, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%1142 = torch.aten._to_copy %1141, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%1143 = torch.aten.var.correction %1142, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1144 = torch.aten.mean.dim %1142, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1145 = torch.aten.add.Tensor %1143, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1146 = torch.aten.rsqrt %1145 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1147 = torch.aten.sub.Tensor %1141, %1144, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%1148 = torch.aten.mul.Tensor %1147, %1146 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%1149 = torch.aten.view %1148, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%1150 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1151 = torch.aten.unsqueeze %1150, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1152 = torch.aten.mul.Tensor %1149, %1151 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%1153 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1154 = torch.aten.unsqueeze %1153, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1155 = torch.aten.add.Tensor %1152, %1154, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%1156 = torch.aten._to_copy %1155, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%1157 = torch.aten._convolution %1156, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1158 = torch.aten.permute %1157, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%1159 = torch.aten._reshape_alias %1158, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%result0_43, %result1_44, %result2_45 = torch.aten.native_layer_norm %1159, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1160 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1161 = torch.aten._reshape_alias %result0_43, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1162 = torch.aten.mm %1161, %1160 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1163 = torch.aten._unsafe_view %1162, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1164 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1165 = torch.aten._reshape_alias %result0_43, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1166 = torch.aten.mm %1165, %1164 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1167 = torch.aten._unsafe_view %1166, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1168 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1169 = torch.aten._reshape_alias %result0_43, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1170 = torch.aten.mm %1169, %1168 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1171 = torch.aten._unsafe_view %1170, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1172 = torch.aten._reshape_alias %1163, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1173 = torch.aten.permute %1172, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1174 = torch.aten.clone %1173, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1175 = torch.aten._unsafe_view %1174, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1176 = torch.aten._reshape_alias %1167, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1177 = torch.aten.permute %1176, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1178 = torch.aten.clone %1177, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1179 = torch.aten._unsafe_view %1178, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1180 = torch.aten._reshape_alias %1171, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1181 = torch.aten.permute %1180, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1182 = torch.aten.clone %1181, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1183 = torch.aten._unsafe_view %1182, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1184 = torch.aten.transpose.int %1179, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%1185 = torch.aten.expand %1175, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1186 = torch.aten._reshape_alias %1185, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1187 = torch.aten.expand %1184, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
%1188 = torch.aten._reshape_alias %1187, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%1189 = torch.aten.bmm %1186, %1188 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%1190 = torch.aten._unsafe_view %1189, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%1191 = torch.aten.mul.Tensor %1190, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%1192 = torch.aten._softmax %1191, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%1193 = torch.aten.expand %1192, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%1194 = torch.aten._reshape_alias %1193, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%1195 = torch.aten.expand %1183, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1196 = torch.aten._reshape_alias %1195, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1197 = torch.aten.bmm %1194, %1196 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%1198 = torch.aten._unsafe_view %1197, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1199 = torch.aten._reshape_alias %1198, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1200 = torch.aten.permute %1199, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1201 = torch.aten.clone %1200, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%1202 = torch.aten._unsafe_view %1201, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1203 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1204 = torch.aten.view %1202, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1205 = torch.aten.addmm %29, %1204, %1203, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1206 = torch.aten.view %1205, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1207 = torch.aten.add.Tensor %1206, %1159, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_46, %result1_47, %result2_48 = torch.aten.native_layer_norm %1207, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1208 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1209 = torch.aten._reshape_alias %result0_46, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1210 = torch.aten.mm %1209, %1208 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1211 = torch.aten._unsafe_view %1210, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1212 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1213 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1214 = torch.aten.mm %1213, %1212 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1215 = torch.aten._unsafe_view %1214, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1216 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1217 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1218 = torch.aten.mm %1217, %1216 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1219 = torch.aten._unsafe_view %1218, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1220 = torch.aten._reshape_alias %1211, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1221 = torch.aten.permute %1220, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1222 = torch.aten.clone %1221, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1223 = torch.aten._unsafe_view %1222, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1224 = torch.aten._reshape_alias %1215, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1225 = torch.aten.permute %1224, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1226 = torch.aten.clone %1225, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1227 = torch.aten._unsafe_view %1226, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1228 = torch.aten._reshape_alias %1219, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1229 = torch.aten.permute %1228, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1230 = torch.aten.clone %1229, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1231 = torch.aten._unsafe_view %1230, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1232 = torch.aten.transpose.int %1227, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%1233 = torch.aten.expand %1223, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1234 = torch.aten._reshape_alias %1233, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1235 = torch.aten.expand %1232, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
%1236 = torch.aten._reshape_alias %1235, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%1237 = torch.aten.bmm %1234, %1236 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%1238 = torch.aten._unsafe_view %1237, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%1239 = torch.aten.mul.Tensor %1238, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%1240 = torch.aten._softmax %1239, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%1241 = torch.aten.expand %1240, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%1242 = torch.aten._reshape_alias %1241, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%1243 = torch.aten.expand %1231, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
%1244 = torch.aten._reshape_alias %1243, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1245 = torch.aten.bmm %1242, %1244 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%1246 = torch.aten._unsafe_view %1245, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1247 = torch.aten._reshape_alias %1246, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1248 = torch.aten.permute %1247, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1249 = torch.aten.clone %1248, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%1250 = torch.aten._unsafe_view %1249, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1251 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1252 = torch.aten.view %1250, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1253 = torch.aten.addmm %29, %1252, %1251, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1254 = torch.aten.view %1253, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1255 = torch.aten.add.Tensor %1254, %1207, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_49, %result1_50, %result2_51 = torch.aten.native_layer_norm %1255, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1256 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%1257 = torch.aten.view %result0_49, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1258 = torch.aten.addmm %22, %1257, %1256, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
%1259 = torch.aten.view %1258, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%1260 = torch.aten.slice.Tensor %1259, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%1261 = torch.aten.slice.Tensor %1259, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%1262 = torch.aten.gelu %1261, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%1263 = torch.aten.mul.Tensor %1260, %1262 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%1264 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%1265 = torch.aten.view %1263, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%1266 = torch.aten.addmm %29, %1265, %1264, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1267 = torch.aten.view %1266, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1268 = torch.aten.add.Tensor %1267, %1255, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%1269 = torch.aten._reshape_alias %1268, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%1270 = torch.aten.permute %1269, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%1271 = torch.aten._convolution %1270, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1272 = torch.aten.add.Tensor %1271, %1139, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1273 = torch.aten._convolution %1272, %25, %29, %484, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1274 = torch.aten.clone %1273, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1275 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1276 = torch.aten.view %1274, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1277 = torch.aten._to_copy %1276, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1278 = torch.aten.var.correction %1277, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1279 = torch.aten.mean.dim %1277, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1280 = torch.aten.add.Tensor %1278, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1281 = torch.aten.rsqrt %1280 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1282 = torch.aten.sub.Tensor %1276, %1279, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1283 = torch.aten.mul.Tensor %1282, %1281 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1284 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1285 = torch.aten.view %1283, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1286 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1287 = torch.aten.unsqueeze %1286, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1288 = torch.aten.mul.Tensor %1285, %1287 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1289 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1290 = torch.aten.unsqueeze %1289, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1291 = torch.aten.add.Tensor %1288, %1290, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1292 = torch.aten._to_copy %1291, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1293 = torch.aten.silu %1292 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1294 = torch.aten._convolution %1293, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1295 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1296 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1297 = torch.aten.addmm %29, %1295, %1296, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1298 = torch.aten.slice.Tensor %1297, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1299 = torch.aten.slice.Tensor %1298, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1300 = torch.aten.unsqueeze %1299, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1301 = torch.aten.unsqueeze %1300, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1302 = torch.aten.add.Tensor %1294, %1301, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1303 = torch.aten.view %1302, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1304 = torch.aten._to_copy %1303, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1305 = torch.aten.var.correction %1304, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1306 = torch.aten.mean.dim %1304, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1307 = torch.aten.add.Tensor %1305, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1308 = torch.aten.rsqrt %1307 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1309 = torch.aten.sub.Tensor %1303, %1306, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1310 = torch.aten.mul.Tensor %1309, %1308 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1311 = torch.aten.view %1310, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1312 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1313 = torch.aten.unsqueeze %1312, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1314 = torch.aten.mul.Tensor %1311, %1313 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1315 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1316 = torch.aten.unsqueeze %1315, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1317 = torch.aten.add.Tensor %1314, %1316, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1318 = torch.aten._to_copy %1317, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1319 = torch.aten.silu %1318 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1320 = torch.aten._convolution %1319, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1321 = torch.aten.add.Tensor %1273, %1320, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1322 = torch.aten.div.Tensor %1321, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%1323 = torch.aten.clone %1322, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1324 = torch.aten.view %1323, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1325 = torch.aten._to_copy %1324, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1326 = torch.aten.var.correction %1325, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1327 = torch.aten.mean.dim %1325, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1328 = torch.aten.add.Tensor %1326, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1329 = torch.aten.rsqrt %1328 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1330 = torch.aten.sub.Tensor %1324, %1327, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1331 = torch.aten.mul.Tensor %1330, %1329 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1332 = torch.aten.view %1331, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1333 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1334 = torch.aten.unsqueeze %1333, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1335 = torch.aten.mul.Tensor %1332, %1334 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1336 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1337 = torch.aten.unsqueeze %1336, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1338 = torch.aten.add.Tensor %1335, %1337, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1339 = torch.aten._to_copy %1338, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1340 = torch.aten.silu %1339 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1341 = torch.aten._convolution %1340, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1342 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1343 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1344 = torch.aten.addmm %29, %1342, %1343, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1345 = torch.aten.slice.Tensor %1344, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1346 = torch.aten.slice.Tensor %1345, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1347 = torch.aten.unsqueeze %1346, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1348 = torch.aten.unsqueeze %1347, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1349 = torch.aten.add.Tensor %1341, %1348, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1350 = torch.aten.view %1349, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1351 = torch.aten._to_copy %1350, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1352 = torch.aten.var.correction %1351, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1353 = torch.aten.mean.dim %1351, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1354 = torch.aten.add.Tensor %1352, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1355 = torch.aten.rsqrt %1354 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1356 = torch.aten.sub.Tensor %1350, %1353, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1357 = torch.aten.mul.Tensor %1356, %1355 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1358 = torch.aten.view %1357, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1359 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1360 = torch.aten.unsqueeze %1359, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1361 = torch.aten.mul.Tensor %1358, %1360 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1362 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1363 = torch.aten.unsqueeze %1362, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1364 = torch.aten.add.Tensor %1361, %1363, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1365 = torch.aten._to_copy %1364, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1366 = torch.aten.silu %1365 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1367 = torch.aten._convolution %1366, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1368 = torch.aten.add.Tensor %1322, %1367, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1369 = torch.aten.div.Tensor %1368, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%1370 = torch.aten.clone %1369, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1371 = torch.aten.view %1370, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1372 = torch.aten._to_copy %1371, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1373 = torch.aten.var.correction %1372, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1374 = torch.aten.mean.dim %1372, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1375 = torch.aten.add.Tensor %1373, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1376 = torch.aten.rsqrt %1375 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1377 = torch.aten.sub.Tensor %1371, %1374, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1378 = torch.aten.mul.Tensor %1377, %1376 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1379 = torch.aten.view %1378, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1380 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1381 = torch.aten.unsqueeze %1380, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1382 = torch.aten.mul.Tensor %1379, %1381 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1383 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1384 = torch.aten.unsqueeze %1383, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1385 = torch.aten.add.Tensor %1382, %1384, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1386 = torch.aten._to_copy %1385, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1387 = torch.aten.silu %1386 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1388 = torch.aten._convolution %1387, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1389 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1390 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1391 = torch.aten.addmm %29, %1389, %1390, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1392 = torch.aten.slice.Tensor %1391, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1393 = torch.aten.slice.Tensor %1392, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1394 = torch.aten.unsqueeze %1393, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1395 = torch.aten.unsqueeze %1394, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1396 = torch.aten.add.Tensor %1388, %1395, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1397 = torch.aten.view %1396, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1398 = torch.aten._to_copy %1397, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1399 = torch.aten.var.correction %1398, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1400 = torch.aten.mean.dim %1398, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1401 = torch.aten.add.Tensor %1399, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1402 = torch.aten.rsqrt %1401 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1403 = torch.aten.sub.Tensor %1397, %1400, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1404 = torch.aten.mul.Tensor %1403, %1402 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1405 = torch.aten.view %1404, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1406 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1407 = torch.aten.unsqueeze %1406, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1408 = torch.aten.mul.Tensor %1405, %1407 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1409 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1410 = torch.aten.unsqueeze %1409, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1411 = torch.aten.add.Tensor %1408, %1410, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1412 = torch.aten._to_copy %1411, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1413 = torch.aten.silu %1412 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1414 = torch.aten._convolution %1413, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1415 = torch.aten.add.Tensor %1369, %1414, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1416 = torch.aten.div.Tensor %1415, %6 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
%1417 = torch.aten.clone %1416, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1418 = torch.aten.view %1417, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1419 = torch.aten._to_copy %1418, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1420 = torch.aten.var.correction %1419, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1421 = torch.aten.mean.dim %1419, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1422 = torch.aten.add.Tensor %1420, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1423 = torch.aten.rsqrt %1422 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1424 = torch.aten.sub.Tensor %1418, %1421, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1425 = torch.aten.mul.Tensor %1424, %1423 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1426 = torch.aten.view %1425, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1427 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1428 = torch.aten.unsqueeze %1427, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1429 = torch.aten.mul.Tensor %1426, %1428 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1430 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1431 = torch.aten.unsqueeze %1430, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1432 = torch.aten.add.Tensor %1429, %1431, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1433 = torch.aten._to_copy %1432, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1434 = torch.aten._convolution %1433, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1435 = torch.aten.permute %1434, %156 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
%1436 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1437 = torch.prim.ListConstruct %int81920, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1438 = torch.aten._reshape_alias %1435, %1436, %1437 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%result0_52, %result1_53, %result2_54 = torch.aten.native_layer_norm %1438, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f32>, !torch.vtensor<[2,64,1],f32>
%1439 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1440 = torch.prim.ListConstruct %int128, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%1441 = torch.aten._reshape_alias %result0_52, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1442 = torch.aten.mm %1441, %1439 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%1443 = torch.aten._unsafe_view %1442, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1444 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1445 = torch.aten._reshape_alias %result0_52, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1446 = torch.aten.mm %1445, %1444 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%1447 = torch.aten._unsafe_view %1446, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1448 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1449 = torch.aten._reshape_alias %result0_52, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1450 = torch.aten.mm %1449, %1448 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%1451 = torch.aten._unsafe_view %1450, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1452 = torch.prim.ListConstruct %int2, %int64, %int8, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1453 = torch.prim.ListConstruct %int81920, %int1280, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1454 = torch.aten._reshape_alias %1443, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%1455 = torch.aten.permute %1454, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%1456 = torch.aten.clone %1455, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%1457 = torch.prim.ListConstruct %int16, %int64, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1458 = torch.aten._unsafe_view %1456, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1459 = torch.aten._reshape_alias %1447, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%1460 = torch.aten.permute %1459, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%1461 = torch.aten.clone %1460, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%1462 = torch.aten._unsafe_view %1461, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1463 = torch.aten._reshape_alias %1451, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%1464 = torch.aten.permute %1463, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%1465 = torch.aten.clone %1464, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%1466 = torch.aten._unsafe_view %1465, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1467 = torch.aten.transpose.int %1462, %int-1, %int-2 : !torch.vtensor<[16,64,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,64],f16>
%1468 = torch.aten.expand %1458, %1457, %false : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,160],f16>
%1469 = torch.prim.ListConstruct %int10240, %int160, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1470 = torch.aten._reshape_alias %1468, %1457, %1469 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1471 = torch.prim.ListConstruct %int16, %int160, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1472 = torch.aten.expand %1467, %1471, %false : !torch.vtensor<[16,160,64],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,64],f16>
%1473 = torch.prim.ListConstruct %int10240, %int1, %int160 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1474 = torch.aten._reshape_alias %1472, %1471, %1473 : !torch.vtensor<[16,160,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,64],f16>
%1475 = torch.aten.bmm %1470, %1474 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,64],f16> -> !torch.vtensor<[16,64,64],f16>
%1476 = torch.prim.ListConstruct %int16, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1477 = torch.aten._unsafe_view %1475, %1476 : !torch.vtensor<[16,64,64],f16>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
%1478 = torch.aten.mul.Tensor %1477, %0 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,64],f16>
%1479 = torch.aten._softmax %1478, %int-1, %false : !torch.vtensor<[16,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,64],f16>
%1480 = torch.aten.expand %1479, %1476, %false : !torch.vtensor<[16,64,64],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,64],f16>
%1481 = torch.prim.ListConstruct %int4096, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1482 = torch.aten._reshape_alias %1480, %1476, %1481 : !torch.vtensor<[16,64,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,64],f16>
%1483 = torch.aten.expand %1466, %1457, %false : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,160],f16>
%1484 = torch.aten._reshape_alias %1483, %1457, %1469 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1485 = torch.aten.bmm %1482, %1484 : !torch.vtensor<[16,64,64],f16>, !torch.vtensor<[16,64,160],f16> -> !torch.vtensor<[16,64,160],f16>
%1486 = torch.aten._unsafe_view %1485, %1457 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1487 = torch.prim.ListConstruct %int2, %int8, %int64, %int160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1488 = torch.prim.ListConstruct %int81920, %int10240, %int160, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1489 = torch.aten._reshape_alias %1486, %1487, %1488 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%1490 = torch.aten.permute %1489, %179 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%1491 = torch.aten.clone %1490, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
%1492 = torch.aten._unsafe_view %1491, %1436 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1493 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1494 = torch.aten.view %1492, %1440 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1495 = torch.aten.addmm %29, %1494, %1493, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,1280],f16>
%1496 = torch.aten.view %1495, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1497 = torch.aten.add.Tensor %1496, %1438, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%result0_55, %result1_56, %result2_57 = torch.aten.native_layer_norm %1497, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f32>, !torch.vtensor<[2,64,1],f32>
%1498 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1499 = torch.aten._reshape_alias %result0_55, %1440, %957 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1500 = torch.aten.mm %1499, %1498 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16>
%1501 = torch.aten._unsafe_view %1500, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1502 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1503 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1504 = torch.aten.mm %1503, %1502 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1505 = torch.aten._unsafe_view %1504, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1506 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1507 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1508 = torch.aten.mm %1507, %1506 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1509 = torch.aten._unsafe_view %1508, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1510 = torch.aten._reshape_alias %1501, %1452, %1453 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%1511 = torch.aten.permute %1510, %179 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%1512 = torch.aten.clone %1511, %int0 : !torch.vtensor<[2,8,64,160],f16>, !torch.int -> !torch.vtensor<[2,8,64,160],f16>
%1513 = torch.aten._unsafe_view %1512, %1457 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1514 = torch.aten._reshape_alias %1505, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1515 = torch.aten.permute %1514, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1516 = torch.aten.clone %1515, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1517 = torch.aten._unsafe_view %1516, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1518 = torch.aten._reshape_alias %1509, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1519 = torch.aten.permute %1518, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1520 = torch.aten.clone %1519, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1521 = torch.aten._unsafe_view %1520, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1522 = torch.aten.transpose.int %1517, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%1523 = torch.aten.expand %1513, %1457, %false : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,160],f16>
%1524 = torch.aten._reshape_alias %1523, %1457, %1469 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1525 = torch.aten.expand %1522, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
%1526 = torch.aten._reshape_alias %1525, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%1527 = torch.aten.bmm %1524, %1526 : !torch.vtensor<[16,64,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,64,77],f16>
%1528 = torch.prim.ListConstruct %int16, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1529 = torch.aten._unsafe_view %1527, %1528 : !torch.vtensor<[16,64,77],f16>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
%1530 = torch.aten.mul.Tensor %1529, %0 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,64,77],f16>
%1531 = torch.aten._softmax %1530, %int-1, %false : !torch.vtensor<[16,64,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,64,77],f16>
%1532 = torch.aten.expand %1531, %1528, %false : !torch.vtensor<[16,64,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,64,77],f16>
%1533 = torch.prim.ListConstruct %int4928, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1534 = torch.aten._reshape_alias %1532, %1528, %1533 : !torch.vtensor<[16,64,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,64,77],f16>
%1535 = torch.aten.expand %1521, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
%1536 = torch.aten._reshape_alias %1535, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1537 = torch.aten.bmm %1534, %1536 : !torch.vtensor<[16,64,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,64,160],f16>
%1538 = torch.aten._unsafe_view %1537, %1457 : !torch.vtensor<[16,64,160],f16>, !torch.list<int> -> !torch.vtensor<[16,64,160],f16>
%1539 = torch.aten._reshape_alias %1538, %1487, %1488 : !torch.vtensor<[16,64,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,64,160],f16>
%1540 = torch.aten.permute %1539, %179 : !torch.vtensor<[2,8,64,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,8,160],f16>
%1541 = torch.aten.clone %1540, %int0 : !torch.vtensor<[2,64,8,160],f16>, !torch.int -> !torch.vtensor<[2,64,8,160],f16>
%1542 = torch.aten._unsafe_view %1541, %1436 : !torch.vtensor<[2,64,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1543 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1544 = torch.aten.view %1542, %1440 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1545 = torch.aten.addmm %29, %1544, %1543, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,1280],f16>
%1546 = torch.aten.view %1545, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1547 = torch.aten.add.Tensor %1546, %1497, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%result0_58, %result1_59, %result2_60 = torch.aten.native_layer_norm %1547, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f32>, !torch.vtensor<[2,64,1],f32>
%1548 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%1549 = torch.aten.view %result0_58, %1440 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16>
%1550 = torch.aten.addmm %22, %1549, %1548, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,10240],f16>
%1551 = torch.prim.ListConstruct %int2, %int64, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1552 = torch.aten.view %1550, %1551 : !torch.vtensor<[128,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10240],f16>
%1553 = torch.aten.slice.Tensor %1552, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
%1554 = torch.aten.slice.Tensor %1552, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16>
%1555 = torch.aten.gelu %1554, %str_0 : !torch.vtensor<[2,64,5120],f16>, !torch.str -> !torch.vtensor<[2,64,5120],f16>
%1556 = torch.aten.mul.Tensor %1553, %1555 : !torch.vtensor<[2,64,5120],f16>, !torch.vtensor<[2,64,5120],f16> -> !torch.vtensor<[2,64,5120],f16>
%1557 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%1558 = torch.prim.ListConstruct %int128, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
%1559 = torch.aten.view %1556, %1558 : !torch.vtensor<[2,64,5120],f16>, !torch.list<int> -> !torch.vtensor<[128,5120],f16>
%1560 = torch.aten.addmm %29, %1559, %1557, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[128,1280],f16>
%1561 = torch.aten.view %1560, %1436 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16>
%1562 = torch.aten.add.Tensor %1561, %1547, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16>
%1563 = torch.prim.ListConstruct %int2, %int8, %int8, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1564 = torch.prim.ListConstruct %int81920, %int10240, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1565 = torch.aten._reshape_alias %1562, %1563, %1564 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16>
%1566 = torch.aten.permute %1565, %300 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16>
%1567 = torch.aten._convolution %1566, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1568 = torch.aten.add.Tensor %1567, %1416, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1569 = torch.aten.clone %1568, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1570 = torch.aten.view %1569, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1571 = torch.aten._to_copy %1570, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1572 = torch.aten.var.correction %1571, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1573 = torch.aten.mean.dim %1571, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1574 = torch.aten.add.Tensor %1572, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1575 = torch.aten.rsqrt %1574 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1576 = torch.aten.sub.Tensor %1570, %1573, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1577 = torch.aten.mul.Tensor %1576, %1575 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1578 = torch.aten.view %1577, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1579 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1580 = torch.aten.unsqueeze %1579, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1581 = torch.aten.mul.Tensor %1578, %1580 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1582 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1583 = torch.aten.unsqueeze %1582, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1584 = torch.aten.add.Tensor %1581, %1583, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1585 = torch.aten._to_copy %1584, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1586 = torch.aten.silu %1585 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1587 = torch.aten._convolution %1586, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1588 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1589 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1590 = torch.aten.addmm %29, %1588, %1589, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1591 = torch.aten.slice.Tensor %1590, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1592 = torch.aten.slice.Tensor %1591, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1593 = torch.aten.unsqueeze %1592, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1594 = torch.aten.unsqueeze %1593, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1595 = torch.aten.add.Tensor %1587, %1594, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1596 = torch.aten.view %1595, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1597 = torch.aten._to_copy %1596, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1598 = torch.aten.var.correction %1597, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1599 = torch.aten.mean.dim %1597, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1600 = torch.aten.add.Tensor %1598, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1601 = torch.aten.rsqrt %1600 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1602 = torch.aten.sub.Tensor %1596, %1599, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1603 = torch.aten.mul.Tensor %1602, %1601 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1604 = torch.aten.view %1603, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1605 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1606 = torch.aten.unsqueeze %1605, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1607 = torch.aten.mul.Tensor %1604, %1606 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1608 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1609 = torch.aten.unsqueeze %1608, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1610 = torch.aten.add.Tensor %1607, %1609, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1611 = torch.aten._to_copy %1610, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1612 = torch.aten.silu %1611 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1613 = torch.aten._convolution %1612, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1614 = torch.aten.add.Tensor %1568, %1613, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1615 = torch.aten.div.Tensor %1614, %6 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],si64> -> !torch.vtensor<[2,1280,8,8],f16>
%1616 = torch.prim.ListConstruct %1615, %1369 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
%1617 = torch.aten.cat %1616, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%1618 = torch.aten.clone %1617, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%1619 = torch.prim.ListConstruct %int2, %int32, %int80, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1620 = torch.aten.view %1618, %1619 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
%1621 = torch.aten._to_copy %1620, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f32>
%1622 = torch.aten.var.correction %1621, %93, %int0, %true : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1623 = torch.aten.mean.dim %1621, %93, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1624 = torch.aten.add.Tensor %1622, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1625 = torch.aten.rsqrt %1624 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1626 = torch.aten.sub.Tensor %1620, %1623, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
%1627 = torch.aten.mul.Tensor %1626, %1625 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
%1628 = torch.prim.ListConstruct %int2, %int2560, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1629 = torch.aten.view %1627, %1628 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
%1630 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1631 = torch.aten.unsqueeze %1630, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1632 = torch.aten.mul.Tensor %1629, %1631 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
%1633 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1634 = torch.aten.unsqueeze %1633, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1635 = torch.aten.add.Tensor %1632, %1634, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
%1636 = torch.aten._to_copy %1635, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,8,8],f16>
%1637 = torch.aten.silu %1636 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%1638 = torch.aten._convolution %1637, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1639 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1640 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1641 = torch.aten.addmm %29, %1639, %1640, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1642 = torch.aten.slice.Tensor %1641, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1643 = torch.aten.slice.Tensor %1642, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1644 = torch.aten.unsqueeze %1643, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1645 = torch.aten.unsqueeze %1644, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1646 = torch.aten.add.Tensor %1638, %1645, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1647 = torch.aten.view %1646, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1648 = torch.aten._to_copy %1647, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1649 = torch.aten.var.correction %1648, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1650 = torch.aten.mean.dim %1648, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1651 = torch.aten.add.Tensor %1649, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1652 = torch.aten.rsqrt %1651 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1653 = torch.aten.sub.Tensor %1647, %1650, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1654 = torch.aten.mul.Tensor %1653, %1652 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1655 = torch.aten.view %1654, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1656 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1657 = torch.aten.unsqueeze %1656, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1658 = torch.aten.mul.Tensor %1655, %1657 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1659 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1660 = torch.aten.unsqueeze %1659, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1661 = torch.aten.add.Tensor %1658, %1660, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1662 = torch.aten._to_copy %1661, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1663 = torch.aten.silu %1662 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1664 = torch.aten._convolution %1663, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1665 = torch.aten._convolution %1617, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1666 = torch.aten.add.Tensor %1665, %1664, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1667 = torch.aten.div.Tensor %1666, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%1668 = torch.prim.ListConstruct %1667, %1322 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
%1669 = torch.aten.cat %1668, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%1670 = torch.aten.clone %1669, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%1671 = torch.aten.view %1670, %1619 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
%1672 = torch.aten._to_copy %1671, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f32>
%1673 = torch.aten.var.correction %1672, %93, %int0, %true : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1674 = torch.aten.mean.dim %1672, %93, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1675 = torch.aten.add.Tensor %1673, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1676 = torch.aten.rsqrt %1675 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1677 = torch.aten.sub.Tensor %1671, %1674, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
%1678 = torch.aten.mul.Tensor %1677, %1676 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
%1679 = torch.aten.view %1678, %1628 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
%1680 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1681 = torch.aten.unsqueeze %1680, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1682 = torch.aten.mul.Tensor %1679, %1681 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
%1683 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1684 = torch.aten.unsqueeze %1683, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1685 = torch.aten.add.Tensor %1682, %1684, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
%1686 = torch.aten._to_copy %1685, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,8,8],f16>
%1687 = torch.aten.silu %1686 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%1688 = torch.aten._convolution %1687, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1689 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1690 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1691 = torch.aten.addmm %29, %1689, %1690, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1692 = torch.aten.slice.Tensor %1691, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1693 = torch.aten.slice.Tensor %1692, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1694 = torch.aten.unsqueeze %1693, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1695 = torch.aten.unsqueeze %1694, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1696 = torch.aten.add.Tensor %1688, %1695, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1697 = torch.aten.view %1696, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1698 = torch.aten._to_copy %1697, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1699 = torch.aten.var.correction %1698, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1700 = torch.aten.mean.dim %1698, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1701 = torch.aten.add.Tensor %1699, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1702 = torch.aten.rsqrt %1701 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1703 = torch.aten.sub.Tensor %1697, %1700, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1704 = torch.aten.mul.Tensor %1703, %1702 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1705 = torch.aten.view %1704, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1706 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1707 = torch.aten.unsqueeze %1706, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1708 = torch.aten.mul.Tensor %1705, %1707 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1709 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1710 = torch.aten.unsqueeze %1709, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1711 = torch.aten.add.Tensor %1708, %1710, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1712 = torch.aten._to_copy %1711, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1713 = torch.aten.silu %1712 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1714 = torch.aten._convolution %1713, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1715 = torch.aten._convolution %1669, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1716 = torch.aten.add.Tensor %1715, %1714, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1717 = torch.aten.div.Tensor %1716, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%1718 = torch.prim.ListConstruct %1717, %1273 : (!torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>) -> !torch.list<vtensor>
%1719 = torch.aten.cat %1718, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%1720 = torch.aten.clone %1719, %int0 : !torch.vtensor<[2,2560,8,8],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f16>
%1721 = torch.aten.view %1720, %1619 : !torch.vtensor<[2,2560,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,64],f16>
%1722 = torch.aten._to_copy %1721, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,64],f32>
%1723 = torch.aten.var.correction %1722, %93, %int0, %true : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1724 = torch.aten.mean.dim %1722, %93, %true, %none : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1725 = torch.aten.add.Tensor %1723, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1726 = torch.aten.rsqrt %1725 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1727 = torch.aten.sub.Tensor %1721, %1724, %int1 : !torch.vtensor<[2,32,80,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,64],f32>
%1728 = torch.aten.mul.Tensor %1727, %1726 : !torch.vtensor<[2,32,80,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,64],f32>
%1729 = torch.aten.view %1728, %1628 : !torch.vtensor<[2,32,80,64],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,8,8],f32>
%1730 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1731 = torch.aten.unsqueeze %1730, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1732 = torch.aten.mul.Tensor %1729, %1731 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,8,8],f32>
%1733 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1734 = torch.aten.unsqueeze %1733, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1735 = torch.aten.add.Tensor %1732, %1734, %int1 : !torch.vtensor<[2,2560,8,8],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,8,8],f32>
%1736 = torch.aten._to_copy %1735, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,8,8],f16>
%1737 = torch.aten.silu %1736 : !torch.vtensor<[2,2560,8,8],f16> -> !torch.vtensor<[2,2560,8,8],f16>
%1738 = torch.aten._convolution %1737, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1739 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1740 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1741 = torch.aten.addmm %29, %1739, %1740, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1742 = torch.aten.slice.Tensor %1741, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1743 = torch.aten.slice.Tensor %1742, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1744 = torch.aten.unsqueeze %1743, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1745 = torch.aten.unsqueeze %1744, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1746 = torch.aten.add.Tensor %1738, %1745, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1747 = torch.aten.view %1746, %1275 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16>
%1748 = torch.aten._to_copy %1747, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f32>
%1749 = torch.aten.var.correction %1748, %93, %int0, %true : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1750 = torch.aten.mean.dim %1748, %93, %true, %none : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1751 = torch.aten.add.Tensor %1749, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1752 = torch.aten.rsqrt %1751 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1753 = torch.aten.sub.Tensor %1747, %1750, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,64],f32>
%1754 = torch.aten.mul.Tensor %1753, %1752 : !torch.vtensor<[2,32,40,64],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,64],f32>
%1755 = torch.aten.view %1754, %1284 : !torch.vtensor<[2,32,40,64],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f32>
%1756 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1757 = torch.aten.unsqueeze %1756, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1758 = torch.aten.mul.Tensor %1755, %1757 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f32>
%1759 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1760 = torch.aten.unsqueeze %1759, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1761 = torch.aten.add.Tensor %1758, %1760, %int1 : !torch.vtensor<[2,1280,8,8],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f32>
%1762 = torch.aten._to_copy %1761, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,8,8],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,8,8],f16>
%1763 = torch.aten.silu %1762 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16>
%1764 = torch.aten._convolution %1763, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1765 = torch.aten._convolution %1719, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,8,8],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,8,8],f16>
%1766 = torch.aten.add.Tensor %1765, %1764, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16>
%1767 = torch.aten.div.Tensor %1766, %4 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,8,8],f16>
%1768 = torch.prim.ListConstruct %float2.000000e00, %float2.000000e00 : (!torch.float, !torch.float) -> !torch.list<float>
%1769 = torch.aten.upsample_nearest2d.vec %1767, %none, %1768 : !torch.vtensor<[2,1280,8,8],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,16,16],f16>
%1770 = torch.aten._convolution %1769, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1771 = torch.prim.ListConstruct %1770, %1272 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
%1772 = torch.aten.cat %1771, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%1773 = torch.aten.clone %1772, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%1774 = torch.prim.ListConstruct %int2, %int32, %int80, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1775 = torch.aten.view %1773, %1774 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
%1776 = torch.aten._to_copy %1775, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f32>
%1777 = torch.aten.var.correction %1776, %93, %int0, %true : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1778 = torch.aten.mean.dim %1776, %93, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1779 = torch.aten.add.Tensor %1777, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1780 = torch.aten.rsqrt %1779 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1781 = torch.aten.sub.Tensor %1775, %1778, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
%1782 = torch.aten.mul.Tensor %1781, %1780 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
%1783 = torch.prim.ListConstruct %int2, %int2560, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1784 = torch.aten.view %1782, %1783 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
%1785 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1786 = torch.aten.unsqueeze %1785, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1787 = torch.aten.mul.Tensor %1784, %1786 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
%1788 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1789 = torch.aten.unsqueeze %1788, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1790 = torch.aten.add.Tensor %1787, %1789, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
%1791 = torch.aten._to_copy %1790, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,16,16],f16>
%1792 = torch.aten.silu %1791 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
%1793 = torch.aten._convolution %1792, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1794 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1795 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1796 = torch.aten.addmm %29, %1794, %1795, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1797 = torch.aten.slice.Tensor %1796, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1798 = torch.aten.slice.Tensor %1797, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1799 = torch.aten.unsqueeze %1798, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1800 = torch.aten.unsqueeze %1799, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1801 = torch.aten.add.Tensor %1793, %1800, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1802 = torch.aten.view %1801, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%1803 = torch.aten._to_copy %1802, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%1804 = torch.aten.var.correction %1803, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1805 = torch.aten.mean.dim %1803, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1806 = torch.aten.add.Tensor %1804, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1807 = torch.aten.rsqrt %1806 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1808 = torch.aten.sub.Tensor %1802, %1805, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%1809 = torch.aten.mul.Tensor %1808, %1807 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%1810 = torch.aten.view %1809, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%1811 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1812 = torch.aten.unsqueeze %1811, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1813 = torch.aten.mul.Tensor %1810, %1812 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%1814 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1815 = torch.aten.unsqueeze %1814, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1816 = torch.aten.add.Tensor %1813, %1815, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%1817 = torch.aten._to_copy %1816, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%1818 = torch.aten.silu %1817 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%1819 = torch.aten._convolution %1818, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1820 = torch.aten._convolution %1772, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1821 = torch.aten.add.Tensor %1820, %1819, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1822 = torch.aten.div.Tensor %1821, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%1823 = torch.aten.clone %1822, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1824 = torch.aten.view %1823, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%1825 = torch.aten._to_copy %1824, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%1826 = torch.aten.var.correction %1825, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1827 = torch.aten.mean.dim %1825, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1828 = torch.aten.add.Tensor %1826, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1829 = torch.aten.rsqrt %1828 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1830 = torch.aten.sub.Tensor %1824, %1827, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%1831 = torch.aten.mul.Tensor %1830, %1829 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%1832 = torch.aten.view %1831, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%1833 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1834 = torch.aten.unsqueeze %1833, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1835 = torch.aten.mul.Tensor %1832, %1834 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%1836 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1837 = torch.aten.unsqueeze %1836, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1838 = torch.aten.add.Tensor %1835, %1837, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%1839 = torch.aten._to_copy %1838, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%1840 = torch.aten._convolution %1839, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1841 = torch.aten.permute %1840, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%1842 = torch.aten._reshape_alias %1841, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%result0_61, %result1_62, %result2_63 = torch.aten.native_layer_norm %1842, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1843 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1844 = torch.aten._reshape_alias %result0_61, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1845 = torch.aten.mm %1844, %1843 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1846 = torch.aten._unsafe_view %1845, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1847 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1848 = torch.aten._reshape_alias %result0_61, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1849 = torch.aten.mm %1848, %1847 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1850 = torch.aten._unsafe_view %1849, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1851 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1852 = torch.aten._reshape_alias %result0_61, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1853 = torch.aten.mm %1852, %1851 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1854 = torch.aten._unsafe_view %1853, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1855 = torch.aten._reshape_alias %1846, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1856 = torch.aten.permute %1855, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1857 = torch.aten.clone %1856, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1858 = torch.aten._unsafe_view %1857, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1859 = torch.aten._reshape_alias %1850, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1860 = torch.aten.permute %1859, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1861 = torch.aten.clone %1860, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1862 = torch.aten._unsafe_view %1861, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1863 = torch.aten._reshape_alias %1854, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1864 = torch.aten.permute %1863, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1865 = torch.aten.clone %1864, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1866 = torch.aten._unsafe_view %1865, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1867 = torch.aten.transpose.int %1862, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%1868 = torch.aten.expand %1858, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1869 = torch.aten._reshape_alias %1868, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1870 = torch.aten.expand %1867, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
%1871 = torch.aten._reshape_alias %1870, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%1872 = torch.aten.bmm %1869, %1871 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%1873 = torch.aten._unsafe_view %1872, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%1874 = torch.aten.mul.Tensor %1873, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%1875 = torch.aten._softmax %1874, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%1876 = torch.aten.expand %1875, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%1877 = torch.aten._reshape_alias %1876, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%1878 = torch.aten.expand %1866, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1879 = torch.aten._reshape_alias %1878, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1880 = torch.aten.bmm %1877, %1879 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%1881 = torch.aten._unsafe_view %1880, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1882 = torch.aten._reshape_alias %1881, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1883 = torch.aten.permute %1882, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1884 = torch.aten.clone %1883, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%1885 = torch.aten._unsafe_view %1884, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1886 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1887 = torch.aten.view %1885, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1888 = torch.aten.addmm %29, %1887, %1886, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1889 = torch.aten.view %1888, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1890 = torch.aten.add.Tensor %1889, %1842, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_64, %result1_65, %result2_66 = torch.aten.native_layer_norm %1890, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1891 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1892 = torch.aten._reshape_alias %result0_64, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1893 = torch.aten.mm %1892, %1891 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%1894 = torch.aten._unsafe_view %1893, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1895 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1896 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1897 = torch.aten.mm %1896, %1895 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1898 = torch.aten._unsafe_view %1897, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1899 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%1900 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%1901 = torch.aten.mm %1900, %1899 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1902 = torch.aten._unsafe_view %1901, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1903 = torch.aten._reshape_alias %1894, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1904 = torch.aten.permute %1903, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1905 = torch.aten.clone %1904, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%1906 = torch.aten._unsafe_view %1905, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1907 = torch.aten._reshape_alias %1898, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1908 = torch.aten.permute %1907, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1909 = torch.aten.clone %1908, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1910 = torch.aten._unsafe_view %1909, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1911 = torch.aten._reshape_alias %1902, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%1912 = torch.aten.permute %1911, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%1913 = torch.aten.clone %1912, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%1914 = torch.aten._unsafe_view %1913, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1915 = torch.aten.transpose.int %1910, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%1916 = torch.aten.expand %1906, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%1917 = torch.aten._reshape_alias %1916, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1918 = torch.aten.expand %1915, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
%1919 = torch.aten._reshape_alias %1918, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%1920 = torch.aten.bmm %1917, %1919 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%1921 = torch.aten._unsafe_view %1920, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%1922 = torch.aten.mul.Tensor %1921, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%1923 = torch.aten._softmax %1922, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%1924 = torch.aten.expand %1923, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%1925 = torch.aten._reshape_alias %1924, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%1926 = torch.aten.expand %1914, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
%1927 = torch.aten._reshape_alias %1926, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%1928 = torch.aten.bmm %1925, %1927 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%1929 = torch.aten._unsafe_view %1928, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%1930 = torch.aten._reshape_alias %1929, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%1931 = torch.aten.permute %1930, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%1932 = torch.aten.clone %1931, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%1933 = torch.aten._unsafe_view %1932, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1934 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1935 = torch.aten.view %1933, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1936 = torch.aten.addmm %29, %1935, %1934, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1937 = torch.aten.view %1936, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1938 = torch.aten.add.Tensor %1937, %1890, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_67, %result1_68, %result2_69 = torch.aten.native_layer_norm %1938, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%1939 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%1940 = torch.aten.view %result0_67, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%1941 = torch.aten.addmm %22, %1940, %1939, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
%1942 = torch.aten.view %1941, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%1943 = torch.aten.slice.Tensor %1942, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%1944 = torch.aten.slice.Tensor %1942, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%1945 = torch.aten.gelu %1944, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%1946 = torch.aten.mul.Tensor %1943, %1945 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%1947 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%1948 = torch.aten.view %1946, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%1949 = torch.aten.addmm %29, %1948, %1947, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%1950 = torch.aten.view %1949, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%1951 = torch.aten.add.Tensor %1950, %1938, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%1952 = torch.aten._reshape_alias %1951, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%1953 = torch.aten.permute %1952, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%1954 = torch.aten._convolution %1953, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1955 = torch.aten.add.Tensor %1954, %1822, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1956 = torch.prim.ListConstruct %1955, %1092 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>) -> !torch.list<vtensor>
%1957 = torch.aten.cat %1956, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%1958 = torch.aten.clone %1957, %int0 : !torch.vtensor<[2,2560,16,16],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f16>
%1959 = torch.aten.view %1958, %1774 : !torch.vtensor<[2,2560,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,80,256],f16>
%1960 = torch.aten._to_copy %1959, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,80,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,80,256],f32>
%1961 = torch.aten.var.correction %1960, %93, %int0, %true : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1962 = torch.aten.mean.dim %1960, %93, %true, %none : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1963 = torch.aten.add.Tensor %1961, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1964 = torch.aten.rsqrt %1963 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1965 = torch.aten.sub.Tensor %1959, %1962, %int1 : !torch.vtensor<[2,32,80,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,256],f32>
%1966 = torch.aten.mul.Tensor %1965, %1964 : !torch.vtensor<[2,32,80,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,256],f32>
%1967 = torch.aten.view %1966, %1783 : !torch.vtensor<[2,32,80,256],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,16,16],f32>
%1968 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1969 = torch.aten.unsqueeze %1968, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1970 = torch.aten.mul.Tensor %1967, %1969 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16> -> !torch.vtensor<[2,2560,16,16],f32>
%1971 = torch.aten.unsqueeze %52, %int-1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560,1],f16>
%1972 = torch.aten.unsqueeze %1971, %int-1 : !torch.vtensor<[2560,1],f16>, !torch.int -> !torch.vtensor<[2560,1,1],f16>
%1973 = torch.aten.add.Tensor %1970, %1972, %int1 : !torch.vtensor<[2,2560,16,16],f32>, !torch.vtensor<[2560,1,1],f16>, !torch.int -> !torch.vtensor<[2,2560,16,16],f32>
%1974 = torch.aten._to_copy %1973, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,16,16],f16>
%1975 = torch.aten.silu %1974 : !torch.vtensor<[2,2560,16,16],f16> -> !torch.vtensor<[2,2560,16,16],f16>
%1976 = torch.aten._convolution %1975, %15, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%1977 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1978 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1979 = torch.aten.addmm %29, %1977, %1978, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1980 = torch.aten.slice.Tensor %1979, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1981 = torch.aten.slice.Tensor %1980, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1982 = torch.aten.unsqueeze %1981, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1983 = torch.aten.unsqueeze %1982, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1984 = torch.aten.add.Tensor %1976, %1983, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%1985 = torch.aten.view %1984, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%1986 = torch.aten._to_copy %1985, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%1987 = torch.aten.var.correction %1986, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%1988 = torch.aten.mean.dim %1986, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%1989 = torch.aten.add.Tensor %1987, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1990 = torch.aten.rsqrt %1989 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1991 = torch.aten.sub.Tensor %1985, %1988, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%1992 = torch.aten.mul.Tensor %1991, %1990 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%1993 = torch.aten.view %1992, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%1994 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1995 = torch.aten.unsqueeze %1994, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1996 = torch.aten.mul.Tensor %1993, %1995 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%1997 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%1998 = torch.aten.unsqueeze %1997, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%1999 = torch.aten.add.Tensor %1996, %1998, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2000 = torch.aten._to_copy %1999, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%2001 = torch.aten.silu %2000 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2002 = torch.aten._convolution %2001, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2003 = torch.aten._convolution %1957, %16, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,16,16],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2004 = torch.aten.add.Tensor %2003, %2002, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2005 = torch.aten.div.Tensor %2004, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%2006 = torch.aten.clone %2005, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2007 = torch.aten.view %2006, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2008 = torch.aten._to_copy %2007, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%2009 = torch.aten.var.correction %2008, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2010 = torch.aten.mean.dim %2008, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2011 = torch.aten.add.Tensor %2009, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2012 = torch.aten.rsqrt %2011 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2013 = torch.aten.sub.Tensor %2007, %2010, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2014 = torch.aten.mul.Tensor %2013, %2012 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2015 = torch.aten.view %2014, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2016 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2017 = torch.aten.unsqueeze %2016, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2018 = torch.aten.mul.Tensor %2015, %2017 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2019 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2020 = torch.aten.unsqueeze %2019, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2021 = torch.aten.add.Tensor %2018, %2020, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2022 = torch.aten._to_copy %2021, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%2023 = torch.aten._convolution %2022, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2024 = torch.aten.permute %2023, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2025 = torch.aten._reshape_alias %2024, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%result0_70, %result1_71, %result2_72 = torch.aten.native_layer_norm %2025, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%2026 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2027 = torch.aten._reshape_alias %result0_70, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2028 = torch.aten.mm %2027, %2026 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2029 = torch.aten._unsafe_view %2028, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2030 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2031 = torch.aten._reshape_alias %result0_70, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2032 = torch.aten.mm %2031, %2030 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2033 = torch.aten._unsafe_view %2032, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2034 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2035 = torch.aten._reshape_alias %result0_70, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2036 = torch.aten.mm %2035, %2034 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2037 = torch.aten._unsafe_view %2036, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2038 = torch.aten._reshape_alias %2029, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2039 = torch.aten.permute %2038, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2040 = torch.aten.clone %2039, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2041 = torch.aten._unsafe_view %2040, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2042 = torch.aten._reshape_alias %2033, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2043 = torch.aten.permute %2042, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2044 = torch.aten.clone %2043, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2045 = torch.aten._unsafe_view %2044, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2046 = torch.aten._reshape_alias %2037, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2047 = torch.aten.permute %2046, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2048 = torch.aten.clone %2047, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2049 = torch.aten._unsafe_view %2048, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2050 = torch.aten.transpose.int %2045, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%2051 = torch.aten.expand %2041, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%2052 = torch.aten._reshape_alias %2051, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2053 = torch.aten.expand %2050, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
%2054 = torch.aten._reshape_alias %2053, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%2055 = torch.aten.bmm %2052, %2054 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%2056 = torch.aten._unsafe_view %2055, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2057 = torch.aten.mul.Tensor %2056, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%2058 = torch.aten._softmax %2057, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%2059 = torch.aten.expand %2058, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%2060 = torch.aten._reshape_alias %2059, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2061 = torch.aten.expand %2049, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%2062 = torch.aten._reshape_alias %2061, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2063 = torch.aten.bmm %2060, %2062 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2064 = torch.aten._unsafe_view %2063, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2065 = torch.aten._reshape_alias %2064, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2066 = torch.aten.permute %2065, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2067 = torch.aten.clone %2066, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2068 = torch.aten._unsafe_view %2067, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2069 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2070 = torch.aten.view %2068, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2071 = torch.aten.addmm %29, %2070, %2069, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%2072 = torch.aten.view %2071, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2073 = torch.aten.add.Tensor %2072, %2025, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_73, %result1_74, %result2_75 = torch.aten.native_layer_norm %2073, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%2074 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2075 = torch.aten._reshape_alias %result0_73, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2076 = torch.aten.mm %2075, %2074 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2077 = torch.aten._unsafe_view %2076, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2078 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%2079 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2080 = torch.aten.mm %2079, %2078 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2081 = torch.aten._unsafe_view %2080, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2082 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%2083 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2084 = torch.aten.mm %2083, %2082 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2085 = torch.aten._unsafe_view %2084, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2086 = torch.aten._reshape_alias %2077, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2087 = torch.aten.permute %2086, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2088 = torch.aten.clone %2087, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2089 = torch.aten._unsafe_view %2088, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2090 = torch.aten._reshape_alias %2081, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2091 = torch.aten.permute %2090, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2092 = torch.aten.clone %2091, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2093 = torch.aten._unsafe_view %2092, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2094 = torch.aten._reshape_alias %2085, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2095 = torch.aten.permute %2094, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2096 = torch.aten.clone %2095, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2097 = torch.aten._unsafe_view %2096, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2098 = torch.aten.transpose.int %2093, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%2099 = torch.aten.expand %2089, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%2100 = torch.aten._reshape_alias %2099, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2101 = torch.aten.expand %2098, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
%2102 = torch.aten._reshape_alias %2101, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%2103 = torch.aten.bmm %2100, %2102 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%2104 = torch.aten._unsafe_view %2103, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2105 = torch.aten.mul.Tensor %2104, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%2106 = torch.aten._softmax %2105, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%2107 = torch.aten.expand %2106, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%2108 = torch.aten._reshape_alias %2107, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2109 = torch.aten.expand %2097, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
%2110 = torch.aten._reshape_alias %2109, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2111 = torch.aten.bmm %2108, %2110 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2112 = torch.aten._unsafe_view %2111, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2113 = torch.aten._reshape_alias %2112, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2114 = torch.aten.permute %2113, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2115 = torch.aten.clone %2114, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2116 = torch.aten._unsafe_view %2115, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2117 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2118 = torch.aten.view %2116, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2119 = torch.aten.addmm %29, %2118, %2117, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%2120 = torch.aten.view %2119, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2121 = torch.aten.add.Tensor %2120, %2073, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_76, %result1_77, %result2_78 = torch.aten.native_layer_norm %2121, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%2122 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%2123 = torch.aten.view %result0_76, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2124 = torch.aten.addmm %22, %2123, %2122, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
%2125 = torch.aten.view %2124, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%2126 = torch.aten.slice.Tensor %2125, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2127 = torch.aten.slice.Tensor %2125, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2128 = torch.aten.gelu %2127, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%2129 = torch.aten.mul.Tensor %2126, %2128 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%2130 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%2131 = torch.aten.view %2129, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%2132 = torch.aten.addmm %29, %2131, %2130, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%2133 = torch.aten.view %2132, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2134 = torch.aten.add.Tensor %2133, %2121, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2135 = torch.aten._reshape_alias %2134, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2136 = torch.aten.permute %2135, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2137 = torch.aten._convolution %2136, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2138 = torch.aten.add.Tensor %2137, %2005, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2139 = torch.prim.ListConstruct %2138, %879 : (!torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,640,16,16],f16>) -> !torch.list<vtensor>
%2140 = torch.aten.cat %2139, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
%2141 = torch.aten.clone %2140, %int0 : !torch.vtensor<[2,1920,16,16],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f16>
%2142 = torch.prim.ListConstruct %int2, %int32, %int60, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2143 = torch.aten.view %2141, %2142 : !torch.vtensor<[2,1920,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,256],f16>
%2144 = torch.aten._to_copy %2143, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,60,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,256],f32>
%2145 = torch.aten.var.correction %2144, %93, %int0, %true : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2146 = torch.aten.mean.dim %2144, %93, %true, %none : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2147 = torch.aten.add.Tensor %2145, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2148 = torch.aten.rsqrt %2147 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2149 = torch.aten.sub.Tensor %2143, %2146, %int1 : !torch.vtensor<[2,32,60,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,256],f32>
%2150 = torch.aten.mul.Tensor %2149, %2148 : !torch.vtensor<[2,32,60,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,256],f32>
%2151 = torch.prim.ListConstruct %int2, %int1920, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2152 = torch.aten.view %2150, %2151 : !torch.vtensor<[2,32,60,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,16,16],f32>
%2153 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%2154 = torch.aten.unsqueeze %2153, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%2155 = torch.aten.mul.Tensor %2152, %2154 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,16,16],f32>
%2156 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%2157 = torch.aten.unsqueeze %2156, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%2158 = torch.aten.add.Tensor %2155, %2157, %int1 : !torch.vtensor<[2,1920,16,16],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,16,16],f32>
%2159 = torch.aten._to_copy %2158, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,16,16],f16>
%2160 = torch.aten.silu %2159 : !torch.vtensor<[2,1920,16,16],f16> -> !torch.vtensor<[2,1920,16,16],f16>
%2161 = torch.aten._convolution %2160, %17, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2162 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2163 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2164 = torch.aten.addmm %29, %2162, %2163, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2165 = torch.aten.slice.Tensor %2164, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2166 = torch.aten.slice.Tensor %2165, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,1280],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2167 = torch.aten.unsqueeze %2166, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2168 = torch.aten.unsqueeze %2167, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2169 = torch.aten.add.Tensor %2161, %2168, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2170 = torch.aten.view %2169, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2171 = torch.aten._to_copy %2170, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%2172 = torch.aten.var.correction %2171, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2173 = torch.aten.mean.dim %2171, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2174 = torch.aten.add.Tensor %2172, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2175 = torch.aten.rsqrt %2174 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2176 = torch.aten.sub.Tensor %2170, %2173, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2177 = torch.aten.mul.Tensor %2176, %2175 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2178 = torch.aten.view %2177, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2179 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2180 = torch.aten.unsqueeze %2179, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2181 = torch.aten.mul.Tensor %2178, %2180 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2182 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2183 = torch.aten.unsqueeze %2182, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2184 = torch.aten.add.Tensor %2181, %2183, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2185 = torch.aten._to_copy %2184, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%2186 = torch.aten.silu %2185 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16>
%2187 = torch.aten._convolution %2186, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2188 = torch.aten._convolution %2140, %18, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,16,16],f16>, !torch.vtensor<[1280,1920,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2189 = torch.aten.add.Tensor %2188, %2187, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2190 = torch.aten.div.Tensor %2189, %4 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,1280,16,16],f16>
%2191 = torch.aten.clone %2190, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2192 = torch.aten.view %2191, %909 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16>
%2193 = torch.aten._to_copy %2192, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f32>
%2194 = torch.aten.var.correction %2193, %93, %int0, %true : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2195 = torch.aten.mean.dim %2193, %93, %true, %none : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2196 = torch.aten.add.Tensor %2194, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2197 = torch.aten.rsqrt %2196 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2198 = torch.aten.sub.Tensor %2192, %2195, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,256],f32>
%2199 = torch.aten.mul.Tensor %2198, %2197 : !torch.vtensor<[2,32,40,256],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,256],f32>
%2200 = torch.aten.view %2199, %918 : !torch.vtensor<[2,32,40,256],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f32>
%2201 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2202 = torch.aten.unsqueeze %2201, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2203 = torch.aten.mul.Tensor %2200, %2202 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f32>
%2204 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2205 = torch.aten.unsqueeze %2204, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2206 = torch.aten.add.Tensor %2203, %2205, %int1 : !torch.vtensor<[2,1280,16,16],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f32>
%2207 = torch.aten._to_copy %2206, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,16,16],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,16,16],f16>
%2208 = torch.aten._convolution %2207, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2209 = torch.aten.permute %2208, %156 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2210 = torch.aten._reshape_alias %2209, %951, %952 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%result0_79, %result1_80, %result2_81 = torch.aten.native_layer_norm %2210, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%2211 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2212 = torch.aten._reshape_alias %result0_79, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2213 = torch.aten.mm %2212, %2211 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2214 = torch.aten._unsafe_view %2213, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2215 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2216 = torch.aten._reshape_alias %result0_79, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2217 = torch.aten.mm %2216, %2215 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2218 = torch.aten._unsafe_view %2217, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2219 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2220 = torch.aten._reshape_alias %result0_79, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2221 = torch.aten.mm %2220, %2219 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2222 = torch.aten._unsafe_view %2221, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2223 = torch.aten._reshape_alias %2214, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2224 = torch.aten.permute %2223, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2225 = torch.aten.clone %2224, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2226 = torch.aten._unsafe_view %2225, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2227 = torch.aten._reshape_alias %2218, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2228 = torch.aten.permute %2227, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2229 = torch.aten.clone %2228, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2230 = torch.aten._unsafe_view %2229, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2231 = torch.aten._reshape_alias %2222, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2232 = torch.aten.permute %2231, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2233 = torch.aten.clone %2232, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2234 = torch.aten._unsafe_view %2233, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2235 = torch.aten.transpose.int %2230, %int-1, %int-2 : !torch.vtensor<[16,256,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,256],f16>
%2236 = torch.aten.expand %2226, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%2237 = torch.aten._reshape_alias %2236, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2238 = torch.aten.expand %2235, %988, %false : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,256],f16>
%2239 = torch.aten._reshape_alias %2238, %988, %990 : !torch.vtensor<[16,160,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,256],f16>
%2240 = torch.aten.bmm %2237, %2239 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,256],f16> -> !torch.vtensor<[16,256,256],f16>
%2241 = torch.aten._unsafe_view %2240, %993 : !torch.vtensor<[16,256,256],f16>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2242 = torch.aten.mul.Tensor %2241, %0 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,256],f16>
%2243 = torch.aten._softmax %2242, %int-1, %false : !torch.vtensor<[16,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%2244 = torch.aten.expand %2243, %993, %false : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,256],f16>
%2245 = torch.aten._reshape_alias %2244, %993, %998 : !torch.vtensor<[16,256,256],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,256],f16>
%2246 = torch.aten.expand %2234, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%2247 = torch.aten._reshape_alias %2246, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2248 = torch.aten.bmm %2245, %2247 : !torch.vtensor<[16,256,256],f16>, !torch.vtensor<[16,256,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2249 = torch.aten._unsafe_view %2248, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2250 = torch.aten._reshape_alias %2249, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2251 = torch.aten.permute %2250, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2252 = torch.aten.clone %2251, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2253 = torch.aten._unsafe_view %2252, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2254 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2255 = torch.aten.view %2253, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2256 = torch.aten.addmm %29, %2255, %2254, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%2257 = torch.aten.view %2256, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2258 = torch.aten.add.Tensor %2257, %2210, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_82, %result1_83, %result2_84 = torch.aten.native_layer_norm %2258, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%2259 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2260 = torch.aten._reshape_alias %result0_82, %956, %957 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2261 = torch.aten.mm %2260, %2259 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16>
%2262 = torch.aten._unsafe_view %2261, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2263 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%2264 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2265 = torch.aten.mm %2264, %2263 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2266 = torch.aten._unsafe_view %2265, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2267 = torch.aten.t %19 : !torch.vtensor<[1280,768],f16> -> !torch.vtensor<[768,1280],f16>
%2268 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2269 = torch.aten.mm %2268, %2267 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2270 = torch.aten._unsafe_view %2269, %1022 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2271 = torch.aten._reshape_alias %2262, %969, %970 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2272 = torch.aten.permute %2271, %179 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2273 = torch.aten.clone %2272, %int0 : !torch.vtensor<[2,8,256,160],f16>, !torch.int -> !torch.vtensor<[2,8,256,160],f16>
%2274 = torch.aten._unsafe_view %2273, %974 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2275 = torch.aten._reshape_alias %2266, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2276 = torch.aten.permute %2275, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2277 = torch.aten.clone %2276, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2278 = torch.aten._unsafe_view %2277, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2279 = torch.aten._reshape_alias %2270, %1032, %1033 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,160],f16>
%2280 = torch.aten.permute %2279, %179 : !torch.vtensor<[2,77,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,160],f16>
%2281 = torch.aten.clone %2280, %int0 : !torch.vtensor<[2,8,77,160],f16>, !torch.int -> !torch.vtensor<[2,8,77,160],f16>
%2282 = torch.aten._unsafe_view %2281, %1037 : !torch.vtensor<[2,8,77,160],f16>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2283 = torch.aten.transpose.int %2278, %int-1, %int-2 : !torch.vtensor<[16,77,160],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,160,77],f16>
%2284 = torch.aten.expand %2274, %974, %false : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,160],f16>
%2285 = torch.aten._reshape_alias %2284, %974, %986 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2286 = torch.aten.expand %2283, %1046, %false : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,160,77],f16>
%2287 = torch.aten._reshape_alias %2286, %1046, %1048 : !torch.vtensor<[16,160,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,160,77],f16>
%2288 = torch.aten.bmm %2285, %2287 : !torch.vtensor<[16,256,160],f16>, !torch.vtensor<[16,160,77],f16> -> !torch.vtensor<[16,256,77],f16>
%2289 = torch.aten._unsafe_view %2288, %1051 : !torch.vtensor<[16,256,77],f16>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2290 = torch.aten.mul.Tensor %2289, %0 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,256,77],f16>
%2291 = torch.aten._softmax %2290, %int-1, %false : !torch.vtensor<[16,256,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%2292 = torch.aten.expand %2291, %1051, %false : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,256,77],f16>
%2293 = torch.aten._reshape_alias %2292, %1051, %1056 : !torch.vtensor<[16,256,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,256,77],f16>
%2294 = torch.aten.expand %2282, %1037, %false : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,160],f16>
%2295 = torch.aten._reshape_alias %2294, %1037, %1059 : !torch.vtensor<[16,77,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,160],f16>
%2296 = torch.aten.bmm %2293, %2295 : !torch.vtensor<[16,256,77],f16>, !torch.vtensor<[16,77,160],f16> -> !torch.vtensor<[16,256,160],f16>
%2297 = torch.aten._unsafe_view %2296, %974 : !torch.vtensor<[16,256,160],f16>, !torch.list<int> -> !torch.vtensor<[16,256,160],f16>
%2298 = torch.aten._reshape_alias %2297, %1004, %1005 : !torch.vtensor<[16,256,160],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,256,160],f16>
%2299 = torch.aten.permute %2298, %179 : !torch.vtensor<[2,8,256,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,8,160],f16>
%2300 = torch.aten.clone %2299, %int0 : !torch.vtensor<[2,256,8,160],f16>, !torch.int -> !torch.vtensor<[2,256,8,160],f16>
%2301 = torch.aten._unsafe_view %2300, %951 : !torch.vtensor<[2,256,8,160],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2302 = torch.aten.t %20 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2303 = torch.aten.view %2301, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2304 = torch.aten.addmm %29, %2303, %2302, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%2305 = torch.aten.view %2304, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2306 = torch.aten.add.Tensor %2305, %2258, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%result0_85, %result1_86, %result2_87 = torch.aten.native_layer_norm %2306, %954, %29, %29, %float1.000000e-05 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.vtensor<[1280],f16>, !torch.vtensor<[1280],f16>, !torch.float -> !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f32>, !torch.vtensor<[2,256,1],f32>
%2307 = torch.aten.t %21 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%2308 = torch.aten.view %result0_85, %956 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16>
%2309 = torch.aten.addmm %22, %2308, %2307, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,10240],f16>
%2310 = torch.aten.view %2309, %1075 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16>
%2311 = torch.aten.slice.Tensor %2310, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2312 = torch.aten.slice.Tensor %2310, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16>
%2313 = torch.aten.gelu %2312, %str_0 : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16>
%2314 = torch.aten.mul.Tensor %2311, %2313 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16>
%2315 = torch.aten.t %23 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%2316 = torch.aten.view %2314, %1082 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16>
%2317 = torch.aten.addmm %29, %2316, %2315, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[512,1280],f16>
%2318 = torch.aten.view %2317, %951 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16>
%2319 = torch.aten.add.Tensor %2318, %2306, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16>
%2320 = torch.aten._reshape_alias %2319, %1087, %1088 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16>
%2321 = torch.aten.permute %2320, %300 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16>
%2322 = torch.aten._convolution %2321, %24, %29, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,16,16],f16>
%2323 = torch.aten.add.Tensor %2322, %2190, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16>
%2324 = torch.aten.upsample_nearest2d.vec %2323, %none, %1768 : !torch.vtensor<[2,1280,16,16],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,1280,32,32],f16>
%2325 = torch.aten._convolution %2324, %25, %29, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,32,32],f16>
%2326 = torch.prim.ListConstruct %2325, %878 : (!torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
%2327 = torch.aten.cat %2326, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
%2328 = torch.aten.clone %2327, %int0 : !torch.vtensor<[2,1920,32,32],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f16>
%2329 = torch.prim.ListConstruct %int2, %int32, %int60, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2330 = torch.aten.view %2328, %2329 : !torch.vtensor<[2,1920,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,60,1024],f16>
%2331 = torch.aten._to_copy %2330, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,60,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,60,1024],f32>
%2332 = torch.aten.var.correction %2331, %93, %int0, %true : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2333 = torch.aten.mean.dim %2331, %93, %true, %none : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2334 = torch.aten.add.Tensor %2332, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2335 = torch.aten.rsqrt %2334 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2336 = torch.aten.sub.Tensor %2330, %2333, %int1 : !torch.vtensor<[2,32,60,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,1024],f32>
%2337 = torch.aten.mul.Tensor %2336, %2335 : !torch.vtensor<[2,32,60,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,1024],f32>
%2338 = torch.prim.ListConstruct %int2, %int1920, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2339 = torch.aten.view %2337, %2338 : !torch.vtensor<[2,32,60,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,32,32],f32>
%2340 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%2341 = torch.aten.unsqueeze %2340, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%2342 = torch.aten.mul.Tensor %2339, %2341 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16> -> !torch.vtensor<[2,1920,32,32],f32>
%2343 = torch.aten.unsqueeze %26, %int-1 : !torch.vtensor<[1920],f16>, !torch.int -> !torch.vtensor<[1920,1],f16>
%2344 = torch.aten.unsqueeze %2343, %int-1 : !torch.vtensor<[1920,1],f16>, !torch.int -> !torch.vtensor<[1920,1,1],f16>
%2345 = torch.aten.add.Tensor %2342, %2344, %int1 : !torch.vtensor<[2,1920,32,32],f32>, !torch.vtensor<[1920,1,1],f16>, !torch.int -> !torch.vtensor<[2,1920,32,32],f32>
%2346 = torch.aten._to_copy %2345, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,32,32],f16>
%2347 = torch.aten.silu %2346 : !torch.vtensor<[2,1920,32,32],f16> -> !torch.vtensor<[2,1920,32,32],f16>
%2348 = torch.aten._convolution %2347, %27, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2349 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2350 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%2351 = torch.aten.addmm %45, %2349, %2350, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2352 = torch.aten.slice.Tensor %2351, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2353 = torch.aten.slice.Tensor %2352, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2354 = torch.aten.unsqueeze %2353, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%2355 = torch.aten.unsqueeze %2354, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%2356 = torch.aten.add.Tensor %2348, %2355, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2357 = torch.aten.view %2356, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%2358 = torch.aten._to_copy %2357, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%2359 = torch.aten.var.correction %2358, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2360 = torch.aten.mean.dim %2358, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2361 = torch.aten.add.Tensor %2359, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2362 = torch.aten.rsqrt %2361 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2363 = torch.aten.sub.Tensor %2357, %2360, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%2364 = torch.aten.mul.Tensor %2363, %2362 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%2365 = torch.aten.view %2364, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%2366 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2367 = torch.aten.unsqueeze %2366, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2368 = torch.aten.mul.Tensor %2365, %2367 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%2369 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2370 = torch.aten.unsqueeze %2369, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2371 = torch.aten.add.Tensor %2368, %2370, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%2372 = torch.aten._to_copy %2371, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%2373 = torch.aten.silu %2372 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%2374 = torch.aten._convolution %2373, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2375 = torch.aten._convolution %2327, %28, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,32,32],f16>, !torch.vtensor<[640,1920,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2376 = torch.aten.add.Tensor %2375, %2374, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2377 = torch.aten.div.Tensor %2376, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%2378 = torch.aten.clone %2377, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2379 = torch.aten.view %2378, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%2380 = torch.aten._to_copy %2379, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%2381 = torch.aten.var.correction %2380, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2382 = torch.aten.mean.dim %2380, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2383 = torch.aten.add.Tensor %2381, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2384 = torch.aten.rsqrt %2383 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2385 = torch.aten.sub.Tensor %2379, %2382, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%2386 = torch.aten.mul.Tensor %2385, %2384 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%2387 = torch.aten.view %2386, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%2388 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2389 = torch.aten.unsqueeze %2388, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2390 = torch.aten.mul.Tensor %2387, %2389 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%2391 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2392 = torch.aten.unsqueeze %2391, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2393 = torch.aten.add.Tensor %2390, %2392, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%2394 = torch.aten._to_copy %2393, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%2395 = torch.aten._convolution %2394, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2396 = torch.aten.permute %2395, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%2397 = torch.aten._reshape_alias %2396, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%result0_88, %result1_89, %result2_90 = torch.aten.native_layer_norm %2397, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2398 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2399 = torch.aten._reshape_alias %result0_88, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2400 = torch.aten.mm %2399, %2398 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2401 = torch.aten._unsafe_view %2400, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2402 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2403 = torch.aten._reshape_alias %result0_88, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2404 = torch.aten.mm %2403, %2402 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2405 = torch.aten._unsafe_view %2404, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2406 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2407 = torch.aten._reshape_alias %result0_88, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2408 = torch.aten.mm %2407, %2406 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2409 = torch.aten._unsafe_view %2408, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2410 = torch.aten._reshape_alias %2401, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2411 = torch.aten.permute %2410, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2412 = torch.aten.clone %2411, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2413 = torch.aten._unsafe_view %2412, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2414 = torch.aten._reshape_alias %2405, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2415 = torch.aten.permute %2414, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2416 = torch.aten.clone %2415, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2417 = torch.aten._unsafe_view %2416, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2418 = torch.aten._reshape_alias %2409, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2419 = torch.aten.permute %2418, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2420 = torch.aten.clone %2419, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2421 = torch.aten._unsafe_view %2420, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2422 = torch.aten.transpose.int %2417, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%2423 = torch.aten.expand %2413, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2424 = torch.aten._reshape_alias %2423, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2425 = torch.aten.expand %2422, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
%2426 = torch.aten._reshape_alias %2425, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%2427 = torch.aten.bmm %2424, %2426 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%2428 = torch.aten._unsafe_view %2427, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%2429 = torch.aten.mul.Tensor %2428, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%2430 = torch.aten._softmax %2429, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%2431 = torch.aten.expand %2430, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%2432 = torch.aten._reshape_alias %2431, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%2433 = torch.aten.expand %2421, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2434 = torch.aten._reshape_alias %2433, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2435 = torch.aten.bmm %2432, %2434 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%2436 = torch.aten._unsafe_view %2435, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2437 = torch.aten._reshape_alias %2436, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2438 = torch.aten.permute %2437, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2439 = torch.aten.clone %2438, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%2440 = torch.aten._unsafe_view %2439, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2441 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2442 = torch.aten.view %2440, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2443 = torch.aten.addmm %45, %2442, %2441, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2444 = torch.aten.view %2443, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2445 = torch.aten.add.Tensor %2444, %2397, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_91, %result1_92, %result2_93 = torch.aten.native_layer_norm %2445, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2446 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2447 = torch.aten._reshape_alias %result0_91, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2448 = torch.aten.mm %2447, %2446 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2449 = torch.aten._unsafe_view %2448, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2450 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%2451 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2452 = torch.aten.mm %2451, %2450 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%2453 = torch.aten._unsafe_view %2452, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%2454 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%2455 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2456 = torch.aten.mm %2455, %2454 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%2457 = torch.aten._unsafe_view %2456, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%2458 = torch.aten._reshape_alias %2449, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2459 = torch.aten.permute %2458, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2460 = torch.aten.clone %2459, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2461 = torch.aten._unsafe_view %2460, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2462 = torch.aten._reshape_alias %2453, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%2463 = torch.aten.permute %2462, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%2464 = torch.aten.clone %2463, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%2465 = torch.aten._unsafe_view %2464, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2466 = torch.aten._reshape_alias %2457, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%2467 = torch.aten.permute %2466, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%2468 = torch.aten.clone %2467, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%2469 = torch.aten._unsafe_view %2468, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2470 = torch.aten.transpose.int %2465, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%2471 = torch.aten.expand %2461, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2472 = torch.aten._reshape_alias %2471, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2473 = torch.aten.expand %2470, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
%2474 = torch.aten._reshape_alias %2473, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%2475 = torch.aten.bmm %2472, %2474 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%2476 = torch.aten._unsafe_view %2475, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%2477 = torch.aten.mul.Tensor %2476, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%2478 = torch.aten._softmax %2477, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%2479 = torch.aten.expand %2478, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%2480 = torch.aten._reshape_alias %2479, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%2481 = torch.aten.expand %2469, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
%2482 = torch.aten._reshape_alias %2481, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2483 = torch.aten.bmm %2480, %2482 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%2484 = torch.aten._unsafe_view %2483, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2485 = torch.aten._reshape_alias %2484, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2486 = torch.aten.permute %2485, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2487 = torch.aten.clone %2486, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%2488 = torch.aten._unsafe_view %2487, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2489 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2490 = torch.aten.view %2488, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2491 = torch.aten.addmm %45, %2490, %2489, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2492 = torch.aten.view %2491, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2493 = torch.aten.add.Tensor %2492, %2445, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_94, %result1_95, %result2_96 = torch.aten.native_layer_norm %2493, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2494 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%2495 = torch.aten.view %result0_94, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2496 = torch.aten.addmm %38, %2495, %2494, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
%2497 = torch.aten.view %2496, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%2498 = torch.aten.slice.Tensor %2497, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%2499 = torch.aten.slice.Tensor %2497, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%2500 = torch.aten.gelu %2499, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%2501 = torch.aten.mul.Tensor %2498, %2500 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%2502 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%2503 = torch.aten.view %2501, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%2504 = torch.aten.addmm %45, %2503, %2502, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2505 = torch.aten.view %2504, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2506 = torch.aten.add.Tensor %2505, %2493, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%2507 = torch.aten._reshape_alias %2506, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%2508 = torch.aten.permute %2507, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%2509 = torch.aten._convolution %2508, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2510 = torch.aten.add.Tensor %2509, %2377, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2511 = torch.prim.ListConstruct %2510, %698 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>) -> !torch.list<vtensor>
%2512 = torch.aten.cat %2511, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
%2513 = torch.aten.clone %2512, %int0 : !torch.vtensor<[2,1280,32,32],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f16>
%2514 = torch.prim.ListConstruct %int2, %int32, %int40, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2515 = torch.aten.view %2513, %2514 : !torch.vtensor<[2,1280,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,1024],f16>
%2516 = torch.aten._to_copy %2515, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,40,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,1024],f32>
%2517 = torch.aten.var.correction %2516, %93, %int0, %true : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2518 = torch.aten.mean.dim %2516, %93, %true, %none : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2519 = torch.aten.add.Tensor %2517, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2520 = torch.aten.rsqrt %2519 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2521 = torch.aten.sub.Tensor %2515, %2518, %int1 : !torch.vtensor<[2,32,40,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,1024],f32>
%2522 = torch.aten.mul.Tensor %2521, %2520 : !torch.vtensor<[2,32,40,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,1024],f32>
%2523 = torch.prim.ListConstruct %int2, %int1280, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2524 = torch.aten.view %2522, %2523 : !torch.vtensor<[2,32,40,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,32,32],f32>
%2525 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2526 = torch.aten.unsqueeze %2525, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2527 = torch.aten.mul.Tensor %2524, %2526 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16> -> !torch.vtensor<[2,1280,32,32],f32>
%2528 = torch.aten.unsqueeze %29, %int-1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280,1],f16>
%2529 = torch.aten.unsqueeze %2528, %int-1 : !torch.vtensor<[1280,1],f16>, !torch.int -> !torch.vtensor<[1280,1,1],f16>
%2530 = torch.aten.add.Tensor %2527, %2529, %int1 : !torch.vtensor<[2,1280,32,32],f32>, !torch.vtensor<[1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,32,32],f32>
%2531 = torch.aten._to_copy %2530, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,32,32],f16>
%2532 = torch.aten.silu %2531 : !torch.vtensor<[2,1280,32,32],f16> -> !torch.vtensor<[2,1280,32,32],f16>
%2533 = torch.aten._convolution %2532, %30, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2534 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2535 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%2536 = torch.aten.addmm %45, %2534, %2535, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2537 = torch.aten.slice.Tensor %2536, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2538 = torch.aten.slice.Tensor %2537, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2539 = torch.aten.unsqueeze %2538, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%2540 = torch.aten.unsqueeze %2539, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%2541 = torch.aten.add.Tensor %2533, %2540, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2542 = torch.aten.view %2541, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%2543 = torch.aten._to_copy %2542, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%2544 = torch.aten.var.correction %2543, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2545 = torch.aten.mean.dim %2543, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2546 = torch.aten.add.Tensor %2544, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2547 = torch.aten.rsqrt %2546 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2548 = torch.aten.sub.Tensor %2542, %2545, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%2549 = torch.aten.mul.Tensor %2548, %2547 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%2550 = torch.aten.view %2549, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%2551 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2552 = torch.aten.unsqueeze %2551, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2553 = torch.aten.mul.Tensor %2550, %2552 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%2554 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2555 = torch.aten.unsqueeze %2554, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2556 = torch.aten.add.Tensor %2553, %2555, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%2557 = torch.aten._to_copy %2556, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%2558 = torch.aten.silu %2557 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%2559 = torch.aten._convolution %2558, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2560 = torch.aten._convolution %2512, %31, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,32,32],f16>, !torch.vtensor<[640,1280,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2561 = torch.aten.add.Tensor %2560, %2559, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2562 = torch.aten.div.Tensor %2561, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%2563 = torch.aten.clone %2562, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2564 = torch.aten.view %2563, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%2565 = torch.aten._to_copy %2564, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%2566 = torch.aten.var.correction %2565, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2567 = torch.aten.mean.dim %2565, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2568 = torch.aten.add.Tensor %2566, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2569 = torch.aten.rsqrt %2568 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2570 = torch.aten.sub.Tensor %2564, %2567, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%2571 = torch.aten.mul.Tensor %2570, %2569 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%2572 = torch.aten.view %2571, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%2573 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2574 = torch.aten.unsqueeze %2573, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2575 = torch.aten.mul.Tensor %2572, %2574 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%2576 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2577 = torch.aten.unsqueeze %2576, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2578 = torch.aten.add.Tensor %2575, %2577, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%2579 = torch.aten._to_copy %2578, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%2580 = torch.aten._convolution %2579, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2581 = torch.aten.permute %2580, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%2582 = torch.aten._reshape_alias %2581, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%result0_97, %result1_98, %result2_99 = torch.aten.native_layer_norm %2582, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2583 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2584 = torch.aten._reshape_alias %result0_97, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2585 = torch.aten.mm %2584, %2583 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2586 = torch.aten._unsafe_view %2585, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2587 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2588 = torch.aten._reshape_alias %result0_97, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2589 = torch.aten.mm %2588, %2587 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2590 = torch.aten._unsafe_view %2589, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2591 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2592 = torch.aten._reshape_alias %result0_97, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2593 = torch.aten.mm %2592, %2591 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2594 = torch.aten._unsafe_view %2593, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2595 = torch.aten._reshape_alias %2586, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2596 = torch.aten.permute %2595, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2597 = torch.aten.clone %2596, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2598 = torch.aten._unsafe_view %2597, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2599 = torch.aten._reshape_alias %2590, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2600 = torch.aten.permute %2599, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2601 = torch.aten.clone %2600, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2602 = torch.aten._unsafe_view %2601, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2603 = torch.aten._reshape_alias %2594, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2604 = torch.aten.permute %2603, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2605 = torch.aten.clone %2604, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2606 = torch.aten._unsafe_view %2605, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2607 = torch.aten.transpose.int %2602, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%2608 = torch.aten.expand %2598, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2609 = torch.aten._reshape_alias %2608, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2610 = torch.aten.expand %2607, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
%2611 = torch.aten._reshape_alias %2610, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%2612 = torch.aten.bmm %2609, %2611 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%2613 = torch.aten._unsafe_view %2612, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%2614 = torch.aten.mul.Tensor %2613, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%2615 = torch.aten._softmax %2614, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%2616 = torch.aten.expand %2615, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%2617 = torch.aten._reshape_alias %2616, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%2618 = torch.aten.expand %2606, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2619 = torch.aten._reshape_alias %2618, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2620 = torch.aten.bmm %2617, %2619 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%2621 = torch.aten._unsafe_view %2620, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2622 = torch.aten._reshape_alias %2621, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2623 = torch.aten.permute %2622, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2624 = torch.aten.clone %2623, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%2625 = torch.aten._unsafe_view %2624, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2626 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2627 = torch.aten.view %2625, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2628 = torch.aten.addmm %45, %2627, %2626, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2629 = torch.aten.view %2628, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2630 = torch.aten.add.Tensor %2629, %2582, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_100, %result1_101, %result2_102 = torch.aten.native_layer_norm %2630, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2631 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2632 = torch.aten._reshape_alias %result0_100, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2633 = torch.aten.mm %2632, %2631 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2634 = torch.aten._unsafe_view %2633, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2635 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%2636 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2637 = torch.aten.mm %2636, %2635 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%2638 = torch.aten._unsafe_view %2637, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%2639 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%2640 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2641 = torch.aten.mm %2640, %2639 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%2642 = torch.aten._unsafe_view %2641, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%2643 = torch.aten._reshape_alias %2634, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2644 = torch.aten.permute %2643, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2645 = torch.aten.clone %2644, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2646 = torch.aten._unsafe_view %2645, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2647 = torch.aten._reshape_alias %2638, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%2648 = torch.aten.permute %2647, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%2649 = torch.aten.clone %2648, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%2650 = torch.aten._unsafe_view %2649, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2651 = torch.aten._reshape_alias %2642, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%2652 = torch.aten.permute %2651, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%2653 = torch.aten.clone %2652, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%2654 = torch.aten._unsafe_view %2653, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2655 = torch.aten.transpose.int %2650, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%2656 = torch.aten.expand %2646, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2657 = torch.aten._reshape_alias %2656, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2658 = torch.aten.expand %2655, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
%2659 = torch.aten._reshape_alias %2658, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%2660 = torch.aten.bmm %2657, %2659 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%2661 = torch.aten._unsafe_view %2660, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%2662 = torch.aten.mul.Tensor %2661, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%2663 = torch.aten._softmax %2662, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%2664 = torch.aten.expand %2663, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%2665 = torch.aten._reshape_alias %2664, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%2666 = torch.aten.expand %2654, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
%2667 = torch.aten._reshape_alias %2666, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2668 = torch.aten.bmm %2665, %2667 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%2669 = torch.aten._unsafe_view %2668, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2670 = torch.aten._reshape_alias %2669, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2671 = torch.aten.permute %2670, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2672 = torch.aten.clone %2671, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%2673 = torch.aten._unsafe_view %2672, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2674 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2675 = torch.aten.view %2673, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2676 = torch.aten.addmm %45, %2675, %2674, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2677 = torch.aten.view %2676, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2678 = torch.aten.add.Tensor %2677, %2630, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_103, %result1_104, %result2_105 = torch.aten.native_layer_norm %2678, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2679 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%2680 = torch.aten.view %result0_103, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2681 = torch.aten.addmm %38, %2680, %2679, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
%2682 = torch.aten.view %2681, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%2683 = torch.aten.slice.Tensor %2682, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%2684 = torch.aten.slice.Tensor %2682, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%2685 = torch.aten.gelu %2684, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%2686 = torch.aten.mul.Tensor %2683, %2685 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%2687 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%2688 = torch.aten.view %2686, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%2689 = torch.aten.addmm %45, %2688, %2687, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2690 = torch.aten.view %2689, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2691 = torch.aten.add.Tensor %2690, %2678, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%2692 = torch.aten._reshape_alias %2691, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%2693 = torch.aten.permute %2692, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%2694 = torch.aten._convolution %2693, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2695 = torch.aten.add.Tensor %2694, %2562, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2696 = torch.prim.ListConstruct %2695, %485 : (!torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,320,32,32],f16>) -> !torch.list<vtensor>
%2697 = torch.aten.cat %2696, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
%2698 = torch.aten.clone %2697, %int0 : !torch.vtensor<[2,960,32,32],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f16>
%2699 = torch.prim.ListConstruct %int2, %int32, %int30, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2700 = torch.aten.view %2698, %2699 : !torch.vtensor<[2,960,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,1024],f16>
%2701 = torch.aten._to_copy %2700, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,30,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,1024],f32>
%2702 = torch.aten.var.correction %2701, %93, %int0, %true : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2703 = torch.aten.mean.dim %2701, %93, %true, %none : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2704 = torch.aten.add.Tensor %2702, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2705 = torch.aten.rsqrt %2704 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2706 = torch.aten.sub.Tensor %2700, %2703, %int1 : !torch.vtensor<[2,32,30,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,1024],f32>
%2707 = torch.aten.mul.Tensor %2706, %2705 : !torch.vtensor<[2,32,30,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,1024],f32>
%2708 = torch.prim.ListConstruct %int2, %int960, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2709 = torch.aten.view %2707, %2708 : !torch.vtensor<[2,32,30,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,960,32,32],f32>
%2710 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%2711 = torch.aten.unsqueeze %2710, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%2712 = torch.aten.mul.Tensor %2709, %2711 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,32,32],f32>
%2713 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%2714 = torch.aten.unsqueeze %2713, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%2715 = torch.aten.add.Tensor %2712, %2714, %int1 : !torch.vtensor<[2,960,32,32],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,32,32],f32>
%2716 = torch.aten._to_copy %2715, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,32,32],f16>
%2717 = torch.aten.silu %2716 : !torch.vtensor<[2,960,32,32],f16> -> !torch.vtensor<[2,960,32,32],f16>
%2718 = torch.aten._convolution %2717, %32, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2719 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2720 = torch.aten.t %33 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%2721 = torch.aten.addmm %45, %2719, %2720, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2722 = torch.aten.slice.Tensor %2721, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2723 = torch.aten.slice.Tensor %2722, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,640],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%2724 = torch.aten.unsqueeze %2723, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%2725 = torch.aten.unsqueeze %2724, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%2726 = torch.aten.add.Tensor %2718, %2725, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2727 = torch.aten.view %2726, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%2728 = torch.aten._to_copy %2727, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%2729 = torch.aten.var.correction %2728, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2730 = torch.aten.mean.dim %2728, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2731 = torch.aten.add.Tensor %2729, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2732 = torch.aten.rsqrt %2731 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2733 = torch.aten.sub.Tensor %2727, %2730, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%2734 = torch.aten.mul.Tensor %2733, %2732 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%2735 = torch.aten.view %2734, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%2736 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2737 = torch.aten.unsqueeze %2736, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2738 = torch.aten.mul.Tensor %2735, %2737 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%2739 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2740 = torch.aten.unsqueeze %2739, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2741 = torch.aten.add.Tensor %2738, %2740, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%2742 = torch.aten._to_copy %2741, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%2743 = torch.aten.silu %2742 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16>
%2744 = torch.aten._convolution %2743, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2745 = torch.aten._convolution %2697, %34, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,32,32],f16>, !torch.vtensor<[640,960,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2746 = torch.aten.add.Tensor %2745, %2744, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2747 = torch.aten.div.Tensor %2746, %4 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,640,32,32],f16>
%2748 = torch.aten.clone %2747, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2749 = torch.aten.view %2748, %515 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16>
%2750 = torch.aten._to_copy %2749, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f32>
%2751 = torch.aten.var.correction %2750, %93, %int0, %true : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2752 = torch.aten.mean.dim %2750, %93, %true, %none : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2753 = torch.aten.add.Tensor %2751, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2754 = torch.aten.rsqrt %2753 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2755 = torch.aten.sub.Tensor %2749, %2752, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,1024],f32>
%2756 = torch.aten.mul.Tensor %2755, %2754 : !torch.vtensor<[2,32,20,1024],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,1024],f32>
%2757 = torch.aten.view %2756, %524 : !torch.vtensor<[2,32,20,1024],f32>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f32>
%2758 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2759 = torch.aten.unsqueeze %2758, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2760 = torch.aten.mul.Tensor %2757, %2759 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f32>
%2761 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%2762 = torch.aten.unsqueeze %2761, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%2763 = torch.aten.add.Tensor %2760, %2762, %int1 : !torch.vtensor<[2,640,32,32],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f32>
%2764 = torch.aten._to_copy %2763, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,32,32],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,32,32],f16>
%2765 = torch.aten._convolution %2764, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2766 = torch.aten.permute %2765, %156 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%2767 = torch.aten._reshape_alias %2766, %557, %558 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%result0_106, %result1_107, %result2_108 = torch.aten.native_layer_norm %2767, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2768 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2769 = torch.aten._reshape_alias %result0_106, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2770 = torch.aten.mm %2769, %2768 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2771 = torch.aten._unsafe_view %2770, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2772 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2773 = torch.aten._reshape_alias %result0_106, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2774 = torch.aten.mm %2773, %2772 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2775 = torch.aten._unsafe_view %2774, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2776 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2777 = torch.aten._reshape_alias %result0_106, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2778 = torch.aten.mm %2777, %2776 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2779 = torch.aten._unsafe_view %2778, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2780 = torch.aten._reshape_alias %2771, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2781 = torch.aten.permute %2780, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2782 = torch.aten.clone %2781, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2783 = torch.aten._unsafe_view %2782, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2784 = torch.aten._reshape_alias %2775, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2785 = torch.aten.permute %2784, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2786 = torch.aten.clone %2785, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2787 = torch.aten._unsafe_view %2786, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2788 = torch.aten._reshape_alias %2779, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2789 = torch.aten.permute %2788, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2790 = torch.aten.clone %2789, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2791 = torch.aten._unsafe_view %2790, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2792 = torch.aten.transpose.int %2787, %int-1, %int-2 : !torch.vtensor<[16,1024,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,1024],f16>
%2793 = torch.aten.expand %2783, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2794 = torch.aten._reshape_alias %2793, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2795 = torch.aten.expand %2792, %594, %false : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,1024],f16>
%2796 = torch.aten._reshape_alias %2795, %594, %596 : !torch.vtensor<[16,80,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,1024],f16>
%2797 = torch.aten.bmm %2794, %2796 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,1024],f16> -> !torch.vtensor<[16,1024,1024],f16>
%2798 = torch.aten._unsafe_view %2797, %599 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%2799 = torch.aten.mul.Tensor %2798, %1 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,1024],f16>
%2800 = torch.aten._softmax %2799, %int-1, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%2801 = torch.aten.expand %2800, %599, %false : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,1024],f16>
%2802 = torch.aten._reshape_alias %2801, %599, %604 : !torch.vtensor<[16,1024,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,1024],f16>
%2803 = torch.aten.expand %2791, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2804 = torch.aten._reshape_alias %2803, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2805 = torch.aten.bmm %2802, %2804 : !torch.vtensor<[16,1024,1024],f16>, !torch.vtensor<[16,1024,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%2806 = torch.aten._unsafe_view %2805, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2807 = torch.aten._reshape_alias %2806, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2808 = torch.aten.permute %2807, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2809 = torch.aten.clone %2808, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%2810 = torch.aten._unsafe_view %2809, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2811 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2812 = torch.aten.view %2810, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2813 = torch.aten.addmm %45, %2812, %2811, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2814 = torch.aten.view %2813, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2815 = torch.aten.add.Tensor %2814, %2767, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_109, %result1_110, %result2_111 = torch.aten.native_layer_norm %2815, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2816 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2817 = torch.aten._reshape_alias %result0_109, %562, %563 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2818 = torch.aten.mm %2817, %2816 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16>
%2819 = torch.aten._unsafe_view %2818, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2820 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%2821 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2822 = torch.aten.mm %2821, %2820 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%2823 = torch.aten._unsafe_view %2822, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%2824 = torch.aten.t %35 : !torch.vtensor<[640,768],f16> -> !torch.vtensor<[768,640],f16>
%2825 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%2826 = torch.aten.mm %2825, %2824 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,640],f16> -> !torch.vtensor<[154,640],f16>
%2827 = torch.aten._unsafe_view %2826, %628 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%2828 = torch.aten._reshape_alias %2819, %575, %576 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2829 = torch.aten.permute %2828, %179 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2830 = torch.aten.clone %2829, %int0 : !torch.vtensor<[2,8,1024,80],f16>, !torch.int -> !torch.vtensor<[2,8,1024,80],f16>
%2831 = torch.aten._unsafe_view %2830, %580 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2832 = torch.aten._reshape_alias %2823, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%2833 = torch.aten.permute %2832, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%2834 = torch.aten.clone %2833, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%2835 = torch.aten._unsafe_view %2834, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2836 = torch.aten._reshape_alias %2827, %638, %639 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,80],f16>
%2837 = torch.aten.permute %2836, %179 : !torch.vtensor<[2,77,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,80],f16>
%2838 = torch.aten.clone %2837, %int0 : !torch.vtensor<[2,8,77,80],f16>, !torch.int -> !torch.vtensor<[2,8,77,80],f16>
%2839 = torch.aten._unsafe_view %2838, %643 : !torch.vtensor<[2,8,77,80],f16>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2840 = torch.aten.transpose.int %2835, %int-1, %int-2 : !torch.vtensor<[16,77,80],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,80,77],f16>
%2841 = torch.aten.expand %2831, %580, %false : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,80],f16>
%2842 = torch.aten._reshape_alias %2841, %580, %592 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2843 = torch.aten.expand %2840, %652, %false : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,80,77],f16>
%2844 = torch.aten._reshape_alias %2843, %652, %654 : !torch.vtensor<[16,80,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,80,77],f16>
%2845 = torch.aten.bmm %2842, %2844 : !torch.vtensor<[16,1024,80],f16>, !torch.vtensor<[16,80,77],f16> -> !torch.vtensor<[16,1024,77],f16>
%2846 = torch.aten._unsafe_view %2845, %657 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%2847 = torch.aten.mul.Tensor %2846, %1 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,1024,77],f16>
%2848 = torch.aten._softmax %2847, %int-1, %false : !torch.vtensor<[16,1024,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%2849 = torch.aten.expand %2848, %657, %false : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,1024,77],f16>
%2850 = torch.aten._reshape_alias %2849, %657, %662 : !torch.vtensor<[16,1024,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,1024,77],f16>
%2851 = torch.aten.expand %2839, %643, %false : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,80],f16>
%2852 = torch.aten._reshape_alias %2851, %643, %665 : !torch.vtensor<[16,77,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,80],f16>
%2853 = torch.aten.bmm %2850, %2852 : !torch.vtensor<[16,1024,77],f16>, !torch.vtensor<[16,77,80],f16> -> !torch.vtensor<[16,1024,80],f16>
%2854 = torch.aten._unsafe_view %2853, %580 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[16,1024,80],f16>
%2855 = torch.aten._reshape_alias %2854, %610, %611 : !torch.vtensor<[16,1024,80],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,1024,80],f16>
%2856 = torch.aten.permute %2855, %179 : !torch.vtensor<[2,8,1024,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,8,80],f16>
%2857 = torch.aten.clone %2856, %int0 : !torch.vtensor<[2,1024,8,80],f16>, !torch.int -> !torch.vtensor<[2,1024,8,80],f16>
%2858 = torch.aten._unsafe_view %2857, %557 : !torch.vtensor<[2,1024,8,80],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2859 = torch.aten.t %36 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%2860 = torch.aten.view %2858, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2861 = torch.aten.addmm %45, %2860, %2859, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2862 = torch.aten.view %2861, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2863 = torch.aten.add.Tensor %2862, %2815, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%result0_112, %result1_113, %result2_114 = torch.aten.native_layer_norm %2863, %560, %45, %45, %float1.000000e-05 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.vtensor<[640],f16>, !torch.vtensor<[640],f16>, !torch.float -> !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f32>, !torch.vtensor<[2,1024,1],f32>
%2864 = torch.aten.t %37 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%2865 = torch.aten.view %result0_112, %562 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16>
%2866 = torch.aten.addmm %38, %2865, %2864, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,5120],f16>
%2867 = torch.aten.view %2866, %681 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16>
%2868 = torch.aten.slice.Tensor %2867, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%2869 = torch.aten.slice.Tensor %2867, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16>
%2870 = torch.aten.gelu %2869, %str_0 : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16>
%2871 = torch.aten.mul.Tensor %2868, %2870 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16>
%2872 = torch.aten.t %39 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%2873 = torch.aten.view %2871, %688 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16>
%2874 = torch.aten.addmm %45, %2873, %2872, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2048,640],f16>
%2875 = torch.aten.view %2874, %557 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16>
%2876 = torch.aten.add.Tensor %2875, %2863, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16>
%2877 = torch.aten._reshape_alias %2876, %693, %694 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16>
%2878 = torch.aten.permute %2877, %300 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16>
%2879 = torch.aten._convolution %2878, %40, %45, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,32,32],f16>
%2880 = torch.aten.add.Tensor %2879, %2747, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16>
%2881 = torch.aten.upsample_nearest2d.vec %2880, %none, %1768 : !torch.vtensor<[2,640,32,32],f16>, !torch.none, !torch.list<float> -> !torch.vtensor<[2,640,64,64],f16>
%2882 = torch.aten._convolution %2881, %41, %45, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,64,64],f16>
%2883 = torch.prim.ListConstruct %2882, %483 : (!torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
%2884 = torch.aten.cat %2883, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
%2885 = torch.aten.clone %2884, %int0 : !torch.vtensor<[2,960,64,64],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f16>
%2886 = torch.prim.ListConstruct %int2, %int32, %int30, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2887 = torch.aten.view %2885, %2886 : !torch.vtensor<[2,960,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,30,4096],f16>
%2888 = torch.aten._to_copy %2887, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,30,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,30,4096],f32>
%2889 = torch.aten.var.correction %2888, %93, %int0, %true : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2890 = torch.aten.mean.dim %2888, %93, %true, %none : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2891 = torch.aten.add.Tensor %2889, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2892 = torch.aten.rsqrt %2891 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2893 = torch.aten.sub.Tensor %2887, %2890, %int1 : !torch.vtensor<[2,32,30,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,4096],f32>
%2894 = torch.aten.mul.Tensor %2893, %2892 : !torch.vtensor<[2,32,30,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,4096],f32>
%2895 = torch.prim.ListConstruct %int2, %int960, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2896 = torch.aten.view %2894, %2895 : !torch.vtensor<[2,32,30,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,960,64,64],f32>
%2897 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%2898 = torch.aten.unsqueeze %2897, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%2899 = torch.aten.mul.Tensor %2896, %2898 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16> -> !torch.vtensor<[2,960,64,64],f32>
%2900 = torch.aten.unsqueeze %42, %int-1 : !torch.vtensor<[960],f16>, !torch.int -> !torch.vtensor<[960,1],f16>
%2901 = torch.aten.unsqueeze %2900, %int-1 : !torch.vtensor<[960,1],f16>, !torch.int -> !torch.vtensor<[960,1,1],f16>
%2902 = torch.aten.add.Tensor %2899, %2901, %int1 : !torch.vtensor<[2,960,64,64],f32>, !torch.vtensor<[960,1,1],f16>, !torch.int -> !torch.vtensor<[2,960,64,64],f32>
%2903 = torch.aten._to_copy %2902, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,64,64],f16>
%2904 = torch.aten.silu %2903 : !torch.vtensor<[2,960,64,64],f16> -> !torch.vtensor<[2,960,64,64],f16>
%2905 = torch.aten._convolution %2904, %43, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%2906 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2907 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%2908 = torch.aten.addmm %55, %2906, %2907, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%2909 = torch.aten.slice.Tensor %2908, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%2910 = torch.aten.slice.Tensor %2909, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%2911 = torch.aten.unsqueeze %2910, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%2912 = torch.aten.unsqueeze %2911, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%2913 = torch.aten.add.Tensor %2905, %2912, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%2914 = torch.aten.view %2913, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%2915 = torch.aten._to_copy %2914, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%2916 = torch.aten.var.correction %2915, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2917 = torch.aten.mean.dim %2915, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2918 = torch.aten.add.Tensor %2916, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2919 = torch.aten.rsqrt %2918 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2920 = torch.aten.sub.Tensor %2914, %2917, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%2921 = torch.aten.mul.Tensor %2920, %2919 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%2922 = torch.aten.view %2921, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%2923 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%2924 = torch.aten.unsqueeze %2923, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%2925 = torch.aten.mul.Tensor %2922, %2924 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%2926 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%2927 = torch.aten.unsqueeze %2926, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%2928 = torch.aten.add.Tensor %2925, %2927, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%2929 = torch.aten._to_copy %2928, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%2930 = torch.aten.silu %2929 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%2931 = torch.aten._convolution %2930, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%2932 = torch.aten._convolution %2884, %44, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,64,64],f16>, !torch.vtensor<[320,960,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%2933 = torch.aten.add.Tensor %2932, %2931, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%2934 = torch.aten.div.Tensor %2933, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%2935 = torch.aten.clone %2934, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%2936 = torch.aten.view %2935, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%2937 = torch.aten._to_copy %2936, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%2938 = torch.aten.var.correction %2937, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%2939 = torch.aten.mean.dim %2937, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%2940 = torch.aten.add.Tensor %2938, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2941 = torch.aten.rsqrt %2940 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2942 = torch.aten.sub.Tensor %2936, %2939, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%2943 = torch.aten.mul.Tensor %2942, %2941 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%2944 = torch.aten.view %2943, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%2945 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%2946 = torch.aten.unsqueeze %2945, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%2947 = torch.aten.mul.Tensor %2944, %2946 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%2948 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%2949 = torch.aten.unsqueeze %2948, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%2950 = torch.aten.add.Tensor %2947, %2949, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%2951 = torch.aten._to_copy %2950, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%2952 = torch.aten._convolution %2951, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%2953 = torch.aten.permute %2952, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%2954 = torch.aten._reshape_alias %2953, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%result0_115, %result1_116, %result2_117 = torch.aten.native_layer_norm %2954, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%2955 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%2956 = torch.aten._reshape_alias %result0_115, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%2957 = torch.aten.mm %2956, %2955 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%2958 = torch.aten._unsafe_view %2957, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%2959 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%2960 = torch.aten._reshape_alias %result0_115, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%2961 = torch.aten.mm %2960, %2959 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%2962 = torch.aten._unsafe_view %2961, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%2963 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%2964 = torch.aten._reshape_alias %result0_115, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%2965 = torch.aten.mm %2964, %2963 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%2966 = torch.aten._unsafe_view %2965, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%2967 = torch.aten._reshape_alias %2958, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%2968 = torch.aten.permute %2967, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%2969 = torch.aten.clone %2968, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%2970 = torch.aten._unsafe_view %2969, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%2971 = torch.aten._reshape_alias %2962, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%2972 = torch.aten.permute %2971, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%2973 = torch.aten.clone %2972, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%2974 = torch.aten._unsafe_view %2973, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%2975 = torch.aten._reshape_alias %2966, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%2976 = torch.aten.permute %2975, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%2977 = torch.aten.clone %2976, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%2978 = torch.aten._unsafe_view %2977, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%2979 = torch.aten.transpose.int %2974, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%2980 = torch.aten.expand %2970, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%2981 = torch.aten._reshape_alias %2980, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%2982 = torch.aten.expand %2979, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
%2983 = torch.aten._reshape_alias %2982, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%2984 = torch.aten.bmm %2981, %2983 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%2985 = torch.aten._unsafe_view %2984, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%2986 = torch.aten.mul.Tensor %2985, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%2987 = torch.aten._softmax %2986, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%2988 = torch.aten.expand %2987, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%2989 = torch.aten._reshape_alias %2988, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%2990 = torch.aten.expand %2978, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%2991 = torch.aten._reshape_alias %2990, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%2992 = torch.aten.bmm %2989, %2991 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%2993 = torch.aten._unsafe_view %2992, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%2994 = torch.aten._reshape_alias %2993, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%2995 = torch.aten.permute %2994, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%2996 = torch.aten.clone %2995, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%2997 = torch.aten._unsafe_view %2996, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%2998 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%2999 = torch.aten.view %2997, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3000 = torch.aten.addmm %55, %2999, %2998, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3001 = torch.aten.view %3000, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3002 = torch.aten.add.Tensor %3001, %2954, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_118, %result1_119, %result2_120 = torch.aten.native_layer_norm %3002, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3003 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3004 = torch.aten._reshape_alias %result0_118, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3005 = torch.aten.mm %3004, %3003 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3006 = torch.aten._unsafe_view %3005, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3007 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%3008 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3009 = torch.aten.mm %3008, %3007 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%3010 = torch.aten._unsafe_view %3009, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3011 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%3012 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3013 = torch.aten.mm %3012, %3011 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%3014 = torch.aten._unsafe_view %3013, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3015 = torch.aten._reshape_alias %3006, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3016 = torch.aten.permute %3015, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3017 = torch.aten.clone %3016, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3018 = torch.aten._unsafe_view %3017, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3019 = torch.aten._reshape_alias %3010, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%3020 = torch.aten.permute %3019, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%3021 = torch.aten.clone %3020, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%3022 = torch.aten._unsafe_view %3021, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3023 = torch.aten._reshape_alias %3014, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%3024 = torch.aten.permute %3023, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%3025 = torch.aten.clone %3024, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%3026 = torch.aten._unsafe_view %3025, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3027 = torch.aten.transpose.int %3022, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%3028 = torch.aten.expand %3018, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3029 = torch.aten._reshape_alias %3028, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3030 = torch.aten.expand %3027, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
%3031 = torch.aten._reshape_alias %3030, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%3032 = torch.aten.bmm %3029, %3031 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%3033 = torch.aten._unsafe_view %3032, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%3034 = torch.aten.mul.Tensor %3033, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%3035 = torch.aten._softmax %3034, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%3036 = torch.aten.expand %3035, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%3037 = torch.aten._reshape_alias %3036, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%3038 = torch.aten.expand %3026, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
%3039 = torch.aten._reshape_alias %3038, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3040 = torch.aten.bmm %3037, %3039 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%3041 = torch.aten._unsafe_view %3040, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3042 = torch.aten._reshape_alias %3041, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3043 = torch.aten.permute %3042, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3044 = torch.aten.clone %3043, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%3045 = torch.aten._unsafe_view %3044, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3046 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3047 = torch.aten.view %3045, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3048 = torch.aten.addmm %55, %3047, %3046, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3049 = torch.aten.view %3048, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3050 = torch.aten.add.Tensor %3049, %3002, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_121, %result1_122, %result2_123 = torch.aten.native_layer_norm %3050, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3051 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%3052 = torch.aten.view %result0_121, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3053 = torch.aten.addmm %52, %3052, %3051, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
%3054 = torch.aten.view %3053, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%3055 = torch.aten.slice.Tensor %3054, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%3056 = torch.aten.slice.Tensor %3054, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%3057 = torch.aten.gelu %3056, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%3058 = torch.aten.mul.Tensor %3055, %3057 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%3059 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%3060 = torch.aten.view %3058, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%3061 = torch.aten.addmm %55, %3060, %3059, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3062 = torch.aten.view %3061, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3063 = torch.aten.add.Tensor %3062, %3050, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%3064 = torch.aten._reshape_alias %3063, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%3065 = torch.aten.permute %3064, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%3066 = torch.aten._convolution %3065, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3067 = torch.aten.add.Tensor %3066, %2934, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3068 = torch.prim.ListConstruct %3067, %303 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
%3069 = torch.aten.cat %3068, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%3070 = torch.aten.clone %3069, %int0 : !torch.vtensor<[2,640,64,64],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%3071 = torch.prim.ListConstruct %int2, %int32, %int20, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3072 = torch.aten.view %3070, %3071 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
%3073 = torch.aten._to_copy %3072, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f32>
%3074 = torch.aten.var.correction %3073, %93, %int0, %true : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3075 = torch.aten.mean.dim %3073, %93, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3076 = torch.aten.add.Tensor %3074, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3077 = torch.aten.rsqrt %3076 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3078 = torch.aten.sub.Tensor %3072, %3075, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
%3079 = torch.aten.mul.Tensor %3078, %3077 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
%3080 = torch.prim.ListConstruct %int2, %int640, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3081 = torch.aten.view %3079, %3080 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
%3082 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%3083 = torch.aten.unsqueeze %3082, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%3084 = torch.aten.mul.Tensor %3081, %3083 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
%3085 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%3086 = torch.aten.unsqueeze %3085, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%3087 = torch.aten.add.Tensor %3084, %3086, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
%3088 = torch.aten._to_copy %3087, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,64,64],f16>
%3089 = torch.aten.silu %3088 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
%3090 = torch.aten._convolution %3089, %46, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3091 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3092 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%3093 = torch.aten.addmm %55, %3091, %3092, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3094 = torch.aten.slice.Tensor %3093, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3095 = torch.aten.slice.Tensor %3094, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3096 = torch.aten.unsqueeze %3095, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%3097 = torch.aten.unsqueeze %3096, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%3098 = torch.aten.add.Tensor %3090, %3097, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3099 = torch.aten.view %3098, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%3100 = torch.aten._to_copy %3099, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%3101 = torch.aten.var.correction %3100, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3102 = torch.aten.mean.dim %3100, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3103 = torch.aten.add.Tensor %3101, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3104 = torch.aten.rsqrt %3103 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3105 = torch.aten.sub.Tensor %3099, %3102, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%3106 = torch.aten.mul.Tensor %3105, %3104 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%3107 = torch.aten.view %3106, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%3108 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3109 = torch.aten.unsqueeze %3108, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3110 = torch.aten.mul.Tensor %3107, %3109 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%3111 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3112 = torch.aten.unsqueeze %3111, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3113 = torch.aten.add.Tensor %3110, %3112, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%3114 = torch.aten._to_copy %3113, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%3115 = torch.aten.silu %3114 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%3116 = torch.aten._convolution %3115, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3117 = torch.aten._convolution %3069, %48, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3118 = torch.aten.add.Tensor %3117, %3116, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3119 = torch.aten.div.Tensor %3118, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%3120 = torch.aten.clone %3119, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3121 = torch.aten.view %3120, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%3122 = torch.aten._to_copy %3121, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%3123 = torch.aten.var.correction %3122, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3124 = torch.aten.mean.dim %3122, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3125 = torch.aten.add.Tensor %3123, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3126 = torch.aten.rsqrt %3125 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3127 = torch.aten.sub.Tensor %3121, %3124, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%3128 = torch.aten.mul.Tensor %3127, %3126 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%3129 = torch.aten.view %3128, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%3130 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3131 = torch.aten.unsqueeze %3130, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3132 = torch.aten.mul.Tensor %3129, %3131 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%3133 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3134 = torch.aten.unsqueeze %3133, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3135 = torch.aten.add.Tensor %3132, %3134, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%3136 = torch.aten._to_copy %3135, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%3137 = torch.aten._convolution %3136, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3138 = torch.aten.permute %3137, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%3139 = torch.aten._reshape_alias %3138, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%result0_124, %result1_125, %result2_126 = torch.aten.native_layer_norm %3139, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3140 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3141 = torch.aten._reshape_alias %result0_124, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3142 = torch.aten.mm %3141, %3140 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3143 = torch.aten._unsafe_view %3142, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3144 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3145 = torch.aten._reshape_alias %result0_124, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3146 = torch.aten.mm %3145, %3144 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3147 = torch.aten._unsafe_view %3146, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3148 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3149 = torch.aten._reshape_alias %result0_124, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3150 = torch.aten.mm %3149, %3148 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3151 = torch.aten._unsafe_view %3150, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3152 = torch.aten._reshape_alias %3143, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3153 = torch.aten.permute %3152, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3154 = torch.aten.clone %3153, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3155 = torch.aten._unsafe_view %3154, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3156 = torch.aten._reshape_alias %3147, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3157 = torch.aten.permute %3156, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3158 = torch.aten.clone %3157, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3159 = torch.aten._unsafe_view %3158, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3160 = torch.aten._reshape_alias %3151, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3161 = torch.aten.permute %3160, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3162 = torch.aten.clone %3161, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3163 = torch.aten._unsafe_view %3162, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3164 = torch.aten.transpose.int %3159, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%3165 = torch.aten.expand %3155, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3166 = torch.aten._reshape_alias %3165, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3167 = torch.aten.expand %3164, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
%3168 = torch.aten._reshape_alias %3167, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%3169 = torch.aten.bmm %3166, %3168 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%3170 = torch.aten._unsafe_view %3169, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%3171 = torch.aten.mul.Tensor %3170, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%3172 = torch.aten._softmax %3171, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%3173 = torch.aten.expand %3172, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%3174 = torch.aten._reshape_alias %3173, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%3175 = torch.aten.expand %3163, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3176 = torch.aten._reshape_alias %3175, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3177 = torch.aten.bmm %3174, %3176 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%3178 = torch.aten._unsafe_view %3177, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3179 = torch.aten._reshape_alias %3178, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3180 = torch.aten.permute %3179, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3181 = torch.aten.clone %3180, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%3182 = torch.aten._unsafe_view %3181, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3183 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3184 = torch.aten.view %3182, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3185 = torch.aten.addmm %55, %3184, %3183, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3186 = torch.aten.view %3185, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3187 = torch.aten.add.Tensor %3186, %3139, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_127, %result1_128, %result2_129 = torch.aten.native_layer_norm %3187, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3188 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3189 = torch.aten._reshape_alias %result0_127, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3190 = torch.aten.mm %3189, %3188 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3191 = torch.aten._unsafe_view %3190, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3192 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%3193 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3194 = torch.aten.mm %3193, %3192 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%3195 = torch.aten._unsafe_view %3194, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3196 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%3197 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3198 = torch.aten.mm %3197, %3196 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%3199 = torch.aten._unsafe_view %3198, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3200 = torch.aten._reshape_alias %3191, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3201 = torch.aten.permute %3200, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3202 = torch.aten.clone %3201, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3203 = torch.aten._unsafe_view %3202, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3204 = torch.aten._reshape_alias %3195, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%3205 = torch.aten.permute %3204, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%3206 = torch.aten.clone %3205, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%3207 = torch.aten._unsafe_view %3206, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3208 = torch.aten._reshape_alias %3199, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%3209 = torch.aten.permute %3208, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%3210 = torch.aten.clone %3209, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%3211 = torch.aten._unsafe_view %3210, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3212 = torch.aten.transpose.int %3207, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%3213 = torch.aten.expand %3203, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3214 = torch.aten._reshape_alias %3213, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3215 = torch.aten.expand %3212, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
%3216 = torch.aten._reshape_alias %3215, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%3217 = torch.aten.bmm %3214, %3216 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%3218 = torch.aten._unsafe_view %3217, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%3219 = torch.aten.mul.Tensor %3218, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%3220 = torch.aten._softmax %3219, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%3221 = torch.aten.expand %3220, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%3222 = torch.aten._reshape_alias %3221, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%3223 = torch.aten.expand %3211, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
%3224 = torch.aten._reshape_alias %3223, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3225 = torch.aten.bmm %3222, %3224 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%3226 = torch.aten._unsafe_view %3225, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3227 = torch.aten._reshape_alias %3226, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3228 = torch.aten.permute %3227, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3229 = torch.aten.clone %3228, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%3230 = torch.aten._unsafe_view %3229, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3231 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3232 = torch.aten.view %3230, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3233 = torch.aten.addmm %55, %3232, %3231, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3234 = torch.aten.view %3233, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3235 = torch.aten.add.Tensor %3234, %3187, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_130, %result1_131, %result2_132 = torch.aten.native_layer_norm %3235, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3236 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%3237 = torch.aten.view %result0_130, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3238 = torch.aten.addmm %52, %3237, %3236, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
%3239 = torch.aten.view %3238, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%3240 = torch.aten.slice.Tensor %3239, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%3241 = torch.aten.slice.Tensor %3239, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%3242 = torch.aten.gelu %3241, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%3243 = torch.aten.mul.Tensor %3240, %3242 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%3244 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%3245 = torch.aten.view %3243, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%3246 = torch.aten.addmm %55, %3245, %3244, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3247 = torch.aten.view %3246, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3248 = torch.aten.add.Tensor %3247, %3235, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%3249 = torch.aten._reshape_alias %3248, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%3250 = torch.aten.permute %3249, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%3251 = torch.aten._convolution %3250, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3252 = torch.aten.add.Tensor %3251, %3119, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3253 = torch.prim.ListConstruct %3252, %89 : (!torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>) -> !torch.list<vtensor>
%3254 = torch.aten.cat %3253, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,64,64],f16>
%3255 = torch.aten.view %3254, %3071 : !torch.vtensor<[2,640,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,4096],f16>
%3256 = torch.aten._to_copy %3255, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,20,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,4096],f32>
%3257 = torch.aten.var.correction %3256, %93, %int0, %true : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3258 = torch.aten.mean.dim %3256, %93, %true, %none : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3259 = torch.aten.add.Tensor %3257, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3260 = torch.aten.rsqrt %3259 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3261 = torch.aten.sub.Tensor %3255, %3258, %int1 : !torch.vtensor<[2,32,20,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,4096],f32>
%3262 = torch.aten.mul.Tensor %3261, %3260 : !torch.vtensor<[2,32,20,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,4096],f32>
%3263 = torch.aten.view %3262, %3080 : !torch.vtensor<[2,32,20,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,640,64,64],f32>
%3264 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%3265 = torch.aten.unsqueeze %3264, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%3266 = torch.aten.mul.Tensor %3263, %3265 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16> -> !torch.vtensor<[2,640,64,64],f32>
%3267 = torch.aten.unsqueeze %45, %int-1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640,1],f16>
%3268 = torch.aten.unsqueeze %3267, %int-1 : !torch.vtensor<[640,1],f16>, !torch.int -> !torch.vtensor<[640,1,1],f16>
%3269 = torch.aten.add.Tensor %3266, %3268, %int1 : !torch.vtensor<[2,640,64,64],f32>, !torch.vtensor<[640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,64,64],f32>
%3270 = torch.aten._to_copy %3269, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,64,64],f16>
%3271 = torch.aten.silu %3270 : !torch.vtensor<[2,640,64,64],f16> -> !torch.vtensor<[2,640,64,64],f16>
%3272 = torch.aten._convolution %3271, %46, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3273 = torch.aten.silu %86 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3274 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%3275 = torch.aten.addmm %55, %3273, %3274, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3276 = torch.aten.slice.Tensor %3275, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3277 = torch.aten.slice.Tensor %3276, %int1, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3278 = torch.aten.unsqueeze %3277, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%3279 = torch.aten.unsqueeze %3278, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%3280 = torch.aten.add.Tensor %3272, %3279, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3281 = torch.aten.view %3280, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%3282 = torch.aten._to_copy %3281, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%3283 = torch.aten.var.correction %3282, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3284 = torch.aten.mean.dim %3282, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3285 = torch.aten.add.Tensor %3283, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3286 = torch.aten.rsqrt %3285 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3287 = torch.aten.sub.Tensor %3281, %3284, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%3288 = torch.aten.mul.Tensor %3287, %3286 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%3289 = torch.aten.view %3288, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%3290 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3291 = torch.aten.unsqueeze %3290, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3292 = torch.aten.mul.Tensor %3289, %3291 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%3293 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3294 = torch.aten.unsqueeze %3293, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3295 = torch.aten.add.Tensor %3292, %3294, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%3296 = torch.aten._to_copy %3295, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%3297 = torch.aten.silu %3296 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%3298 = torch.aten._convolution %3297, %47, %55, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3299 = torch.aten._convolution %3254, %48, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,64,64],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3300 = torch.aten.add.Tensor %3299, %3298, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3301 = torch.aten.div.Tensor %3300, %4 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[2,320,64,64],f16>
%3302 = torch.aten.view %3301, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%3303 = torch.aten._to_copy %3302, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%3304 = torch.aten.var.correction %3303, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3305 = torch.aten.mean.dim %3303, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3306 = torch.aten.add.Tensor %3304, %3, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3307 = torch.aten.rsqrt %3306 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3308 = torch.aten.sub.Tensor %3302, %3305, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%3309 = torch.aten.mul.Tensor %3308, %3307 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%3310 = torch.aten.view %3309, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%3311 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3312 = torch.aten.unsqueeze %3311, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3313 = torch.aten.mul.Tensor %3310, %3312 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%3314 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3315 = torch.aten.unsqueeze %3314, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3316 = torch.aten.add.Tensor %3313, %3315, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%3317 = torch.aten._to_copy %3316, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%3318 = torch.aten._convolution %3317, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3319 = torch.aten.permute %3318, %156 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%3320 = torch.aten._reshape_alias %3319, %158, %159 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%result0_133, %result1_134, %result2_135 = torch.aten.native_layer_norm %3320, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3321 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3322 = torch.aten._reshape_alias %result0_133, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3323 = torch.aten.mm %3322, %3321 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3324 = torch.aten._unsafe_view %3323, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3325 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3326 = torch.aten._reshape_alias %result0_133, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3327 = torch.aten.mm %3326, %3325 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3328 = torch.aten._unsafe_view %3327, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3329 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3330 = torch.aten._reshape_alias %result0_133, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3331 = torch.aten.mm %3330, %3329 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3332 = torch.aten._unsafe_view %3331, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3333 = torch.aten._reshape_alias %3324, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3334 = torch.aten.permute %3333, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3335 = torch.aten.clone %3334, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3336 = torch.aten._unsafe_view %3335, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3337 = torch.aten._reshape_alias %3328, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3338 = torch.aten.permute %3337, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3339 = torch.aten.clone %3338, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3340 = torch.aten._unsafe_view %3339, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3341 = torch.aten._reshape_alias %3332, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3342 = torch.aten.permute %3341, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3343 = torch.aten.clone %3342, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3344 = torch.aten._unsafe_view %3343, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3345 = torch.aten.transpose.int %3340, %int-1, %int-2 : !torch.vtensor<[16,4096,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,4096],f16>
%3346 = torch.aten.expand %3336, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3347 = torch.aten._reshape_alias %3346, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3348 = torch.aten.expand %3345, %196, %false : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,4096],f16>
%3349 = torch.aten._reshape_alias %3348, %196, %198 : !torch.vtensor<[16,40,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,4096],f16>
%3350 = torch.aten.bmm %3347, %3349 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,4096],f16> -> !torch.vtensor<[16,4096,4096],f16>
%3351 = torch.aten._unsafe_view %3350, %201 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%3352 = torch.aten.mul.Tensor %3351, %2 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,4096],f16>
%3353 = torch.aten._softmax %3352, %int-1, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%3354 = torch.aten.expand %3353, %201, %false : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,4096],f16>
%3355 = torch.aten._reshape_alias %3354, %201, %206 : !torch.vtensor<[16,4096,4096],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,4096],f16>
%3356 = torch.aten.expand %3344, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3357 = torch.aten._reshape_alias %3356, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3358 = torch.aten.bmm %3355, %3357 : !torch.vtensor<[16,4096,4096],f16>, !torch.vtensor<[16,4096,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%3359 = torch.aten._unsafe_view %3358, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3360 = torch.aten._reshape_alias %3359, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3361 = torch.aten.permute %3360, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3362 = torch.aten.clone %3361, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%3363 = torch.aten._unsafe_view %3362, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3364 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3365 = torch.aten.view %3363, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3366 = torch.aten.addmm %55, %3365, %3364, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3367 = torch.aten.view %3366, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3368 = torch.aten.add.Tensor %3367, %3320, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_136, %result1_137, %result2_138 = torch.aten.native_layer_norm %3368, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3369 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3370 = torch.aten._reshape_alias %result0_136, %163, %164 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3371 = torch.aten.mm %3370, %3369 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16>
%3372 = torch.aten._unsafe_view %3371, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3373 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%3374 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3375 = torch.aten.mm %3374, %3373 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%3376 = torch.aten._unsafe_view %3375, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3377 = torch.aten.t %49 : !torch.vtensor<[320,768],f16> -> !torch.vtensor<[768,320],f16>
%3378 = torch.aten._reshape_alias %arg2, %228, %229 : !torch.vtensor<[2,77,768],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,768],f16>
%3379 = torch.aten.mm %3378, %3377 : !torch.vtensor<[154,768],f16>, !torch.vtensor<[768,320],f16> -> !torch.vtensor<[154,320],f16>
%3380 = torch.aten._unsafe_view %3379, %232 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3381 = torch.aten._reshape_alias %3372, %176, %177 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3382 = torch.aten.permute %3381, %179 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3383 = torch.aten.clone %3382, %int0 : !torch.vtensor<[2,8,4096,40],f16>, !torch.int -> !torch.vtensor<[2,8,4096,40],f16>
%3384 = torch.aten._unsafe_view %3383, %182 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3385 = torch.aten._reshape_alias %3376, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%3386 = torch.aten.permute %3385, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%3387 = torch.aten.clone %3386, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%3388 = torch.aten._unsafe_view %3387, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3389 = torch.aten._reshape_alias %3380, %242, %243 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,8,40],f16>
%3390 = torch.aten.permute %3389, %179 : !torch.vtensor<[2,77,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,8,77,40],f16>
%3391 = torch.aten.clone %3390, %int0 : !torch.vtensor<[2,8,77,40],f16>, !torch.int -> !torch.vtensor<[2,8,77,40],f16>
%3392 = torch.aten._unsafe_view %3391, %247 : !torch.vtensor<[2,8,77,40],f16>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3393 = torch.aten.transpose.int %3388, %int-1, %int-2 : !torch.vtensor<[16,77,40],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,40,77],f16>
%3394 = torch.aten.expand %3384, %182, %false : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,40],f16>
%3395 = torch.aten._reshape_alias %3394, %182, %194 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3396 = torch.aten.expand %3393, %256, %false : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,40,77],f16>
%3397 = torch.aten._reshape_alias %3396, %256, %258 : !torch.vtensor<[16,40,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,40,77],f16>
%3398 = torch.aten.bmm %3395, %3397 : !torch.vtensor<[16,4096,40],f16>, !torch.vtensor<[16,40,77],f16> -> !torch.vtensor<[16,4096,77],f16>
%3399 = torch.aten._unsafe_view %3398, %261 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%3400 = torch.aten.mul.Tensor %3399, %2 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[16,4096,77],f16>
%3401 = torch.aten._softmax %3400, %int-1, %false : !torch.vtensor<[16,4096,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%3402 = torch.aten.expand %3401, %261, %false : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,4096,77],f16>
%3403 = torch.aten._reshape_alias %3402, %261, %266 : !torch.vtensor<[16,4096,77],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,4096,77],f16>
%3404 = torch.aten.expand %3392, %247, %false : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[16,77,40],f16>
%3405 = torch.aten._reshape_alias %3404, %247, %269 : !torch.vtensor<[16,77,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[16,77,40],f16>
%3406 = torch.aten.bmm %3403, %3405 : !torch.vtensor<[16,4096,77],f16>, !torch.vtensor<[16,77,40],f16> -> !torch.vtensor<[16,4096,40],f16>
%3407 = torch.aten._unsafe_view %3406, %182 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[16,4096,40],f16>
%3408 = torch.aten._reshape_alias %3407, %212, %213 : !torch.vtensor<[16,4096,40],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,8,4096,40],f16>
%3409 = torch.aten.permute %3408, %179 : !torch.vtensor<[2,8,4096,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,8,40],f16>
%3410 = torch.aten.clone %3409, %int0 : !torch.vtensor<[2,4096,8,40],f16>, !torch.int -> !torch.vtensor<[2,4096,8,40],f16>
%3411 = torch.aten._unsafe_view %3410, %158 : !torch.vtensor<[2,4096,8,40],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3412 = torch.aten.t %50 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3413 = torch.aten.view %3411, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3414 = torch.aten.addmm %55, %3413, %3412, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3415 = torch.aten.view %3414, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3416 = torch.aten.add.Tensor %3415, %3368, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%result0_139, %result1_140, %result2_141 = torch.aten.native_layer_norm %3416, %161, %55, %55, %float1.000000e-05 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.vtensor<[320],f16>, !torch.vtensor<[320],f16>, !torch.float -> !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f32>, !torch.vtensor<[2,4096,1],f32>
%3417 = torch.aten.t %51 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%3418 = torch.aten.view %result0_139, %163 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16>
%3419 = torch.aten.addmm %52, %3418, %3417, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,2560],f16>
%3420 = torch.aten.view %3419, %285 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16>
%3421 = torch.aten.slice.Tensor %3420, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%3422 = torch.aten.slice.Tensor %3420, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16>
%3423 = torch.aten.gelu %3422, %str_0 : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16>
%3424 = torch.aten.mul.Tensor %3421, %3423 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16>
%3425 = torch.aten.t %53 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%3426 = torch.aten.view %3424, %292 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16>
%3427 = torch.aten.addmm %55, %3426, %3425, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[8192,320],f16>
%3428 = torch.aten.view %3427, %158 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16>
%3429 = torch.aten.add.Tensor %3428, %3416, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16>
%3430 = torch.aten._reshape_alias %3429, %297, %298 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16>
%3431 = torch.aten.permute %3430, %300 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16>
%3432 = torch.aten._convolution %3431, %54, %55, %87, %88, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,64,64],f16>
%3433 = torch.aten.add.Tensor %3432, %3301, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3434 = torch.aten.clone %3433, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16>
%3435 = torch.aten.view %3434, %90 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16>
%3436 = torch.aten._to_copy %3435, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f32>
%3437 = torch.aten.var.correction %3436, %93, %int0, %true : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>
%3438 = torch.aten.mean.dim %3436, %93, %true, %none : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f32>
%3439 = torch.aten.add.Tensor %3437, %5, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3440 = torch.aten.rsqrt %3439 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3441 = torch.aten.sub.Tensor %3435, %3438, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,4096],f32>
%3442 = torch.aten.mul.Tensor %3441, %3440 : !torch.vtensor<[2,32,10,4096],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,4096],f32>
%3443 = torch.aten.view %3442, %100 : !torch.vtensor<[2,32,10,4096],f32>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f32>
%3444 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3445 = torch.aten.unsqueeze %3444, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3446 = torch.aten.mul.Tensor %3443, %3445 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f32>
%3447 = torch.aten.unsqueeze %55, %int-1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320,1],f16>
%3448 = torch.aten.unsqueeze %3447, %int-1 : !torch.vtensor<[320,1],f16>, !torch.int -> !torch.vtensor<[320,1,1],f16>
%3449 = torch.aten.add.Tensor %3446, %3448, %int1 : !torch.vtensor<[2,320,64,64],f32>, !torch.vtensor<[320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f32>
%3450 = torch.aten._to_copy %3449, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,64,64],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,64,64],f16>
%3451 = torch.aten.silu %3450 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16>
%3452 = torch.aten._convolution %3451, %56, %57, %87, %87, %87, %false, %88, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[4,320,3,3],f16>, !torch.vtensor<[4],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,4,64,64],f16>
return %3452 : !torch.vtensor<[2,4,64,64],f16>
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment